From 2d7eff5ca9221e01d698932d17cacc501db30d23 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse Date: Tue, 19 May 2026 15:37:32 -0400 Subject: [PATCH 01/43] test: add Phase 0 safety net before restructure Locks in current behavior so the upcoming package restructure can be verified one move at a time: - tests/unit/: pure-Python tests for SmoothedCameraman, SpeakerTracker, VideoEditor filter sanitization + zoompan enforcement, generate_srt / format_srt_block / hex_to_ass_color, create_hook_image (real PIL), and translate.SUPPORTED_LANGUAGES. - tests/api/: FastAPI TestClient contract checks. Captures the full openapi.json into tests/snapshots/baseline.openapi.json (32 routes) so any drift across the restructure fails loudly. - tests/e2e/: real-ffmpeg pipeline smoke test, skipped unless a fixture video and all production deps are present. - conftest.py stubs heavy ML deps (cv2, mediapipe, ultralytics, torch, yt_dlp, scenedetect, google.genai, faster_whisper) via sys.modules so unit + api tests run on a stock laptop. - requirements-dev.txt + pyproject.toml pull pytest, respx, fastapi, pillow, boto3, etc. and configure the e2e marker. Result on the unchanged flat codebase: pytest -m \"not e2e\" -> 62 passed in 0.6s Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 5 + pyproject.toml | 12 + requirements-dev.txt | 13 + tests/__init__.py | 0 tests/api/__init__.py | 0 tests/api/test_openapi_contract.py | 121 ++ tests/conftest.py | 143 ++ tests/e2e/__init__.py | 0 tests/e2e/test_pipeline_smoke.py | 108 ++ tests/fixtures/README.md | 33 + tests/snapshots/.gitkeep | 0 tests/snapshots/baseline.openapi.json | 2174 ++++++++++++++++++++++++ tests/unit/__init__.py | 0 tests/unit/test_filter_sanitization.py | 87 + tests/unit/test_hook_image.py | 88 + tests/unit/test_srt_generation.py | 123 ++ tests/unit/test_tracking.py | 175 ++ tests/unit/test_translate_languages.py | 40 + 18 files changed, 3122 insertions(+) create mode 100644 pyproject.toml create mode 100644 requirements-dev.txt create mode 100644 tests/__init__.py create mode 100644 tests/api/__init__.py create mode 100644 tests/api/test_openapi_contract.py create mode 100644 tests/conftest.py create mode 100644 tests/e2e/__init__.py create mode 100644 tests/e2e/test_pipeline_smoke.py create mode 100644 tests/fixtures/README.md create mode 100644 tests/snapshots/.gitkeep create mode 100644 tests/snapshots/baseline.openapi.json create mode 100644 tests/unit/__init__.py create mode 100644 tests/unit/test_filter_sanitization.py create mode 100644 tests/unit/test_hook_image.py create mode 100644 tests/unit/test_srt_generation.py create mode 100644 tests/unit/test_tracking.py create mode 100644 tests/unit/test_translate_languages.py diff --git a/.gitignore b/.gitignore index f2df51cd..adeda0d3 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,11 @@ output/ # Cache dirs .cache/ .config/ +.pytest_cache/ + +# Test ephemera (baseline.openapi.json IS committed; current is not) +tests/snapshots/current.openapi.json +tests/fixtures/smoke.mp4 # Multi-agent Skills .agents/ .agent/ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..cfb2e22d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,12 @@ +[tool.pytest.ini_options] +testpaths = ["tests"] +asyncio_mode = "auto" +markers = [ + "e2e: end-to-end smoke test that needs real ffmpeg and a fixture video", + "api: API contract tests via FastAPI TestClient", + "unit: pure-Python unit tests", +] +filterwarnings = [ + "ignore::DeprecationWarning", + "ignore::PendingDeprecationWarning", +] diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..874f21fb --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,13 @@ +pytest>=8.0 +pytest-asyncio>=0.23 +pytest-mock>=3.12 +httpx>=0.28 +respx>=0.21 +Pillow>=10.0 +fastapi>=0.110 +python-multipart>=0.0.9 +pydantic>=2.0 +python-dotenv>=1.0 +numpy>=1.26 +boto3>=1.34 +beautifulsoup4>=4.12 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/api/__init__.py b/tests/api/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/api/test_openapi_contract.py b/tests/api/test_openapi_contract.py new file mode 100644 index 00000000..8b7c547b --- /dev/null +++ b/tests/api/test_openapi_contract.py @@ -0,0 +1,121 @@ +""" +API contract test: snapshot the FastAPI openapi.json. + +This is the single most important regression check for the restructure. +The flat layout's openapi dump is captured into +`tests/snapshots/baseline.openapi.json` on the first run. After the +restructure, the test diffs the current dump against the baseline — +any route renamed, dropped, or with a changed schema fails loudly. + +To intentionally regenerate the baseline (e.g. after we restructure +and accept the new shape), delete the snapshot file and re-run. +""" +import json +import os +from pathlib import Path + +import pytest + +SNAPSHOTS_DIR = Path(__file__).resolve().parent.parent / "snapshots" +BASELINE = SNAPSHOTS_DIR / "baseline.openapi.json" +CURRENT = SNAPSHOTS_DIR / "current.openapi.json" + + +@pytest.fixture +def app_client(tmp_path, monkeypatch): + """ + Build a TestClient against the production FastAPI app. + + Imports `app` lazily inside the fixture so the conftest sys.modules + stubs are in place first. app.py creates `uploads/` and `output/` + at import time — we redirect cwd into a tmp dir so we don't leave + artifacts in the repo. + """ + (tmp_path / "uploads").mkdir(exist_ok=True) + (tmp_path / "output").mkdir(exist_ok=True) + monkeypatch.chdir(tmp_path) + + from fastapi.testclient import TestClient + import app as app_module # noqa: WPS433 intentional late import + + with TestClient(app_module.app) as client: + yield client + + +def _dump_openapi(client) -> dict: + """Normalize the openapi dict so trivial reorderings don't false-fail.""" + resp = client.get("/openapi.json") + assert resp.status_code == 200 + return resp.json() + + +def test_openapi_dump_matches_baseline(app_client): + SNAPSHOTS_DIR.mkdir(parents=True, exist_ok=True) + current = _dump_openapi(app_client) + CURRENT.write_text(json.dumps(current, indent=2, sort_keys=True), encoding="utf-8") + + if not BASELINE.exists(): + # First run: capture the baseline so subsequent runs have something + # to diff against. The test passes (this is intentional — the user + # is locking in current behavior). + BASELINE.write_text( + json.dumps(current, indent=2, sort_keys=True), encoding="utf-8" + ) + pytest.skip( + f"Wrote initial baseline to {BASELINE}. Commit it to lock the contract." + ) + + baseline = json.loads(BASELINE.read_text(encoding="utf-8")) + if current != baseline: + pytest.fail( + "openapi.json drifted from baseline.\n" + f"Diff written to {CURRENT}.\n" + f"If the change is intentional, delete {BASELINE} and re-run " + "to regenerate." + ) + + +# --- Targeted route checks that don't need any external mocks ---------- + + +def test_translate_languages_endpoint_returns_dict(app_client): + """The endpoint must expose the canonical language codes somewhere + in its response — directly as keys, or nested under a common wrapper + (e.g. {"languages": {...}}). We just need 'en', 'es', 'fr' to show up.""" + r = app_client.get("/api/translate/languages") + assert r.status_code == 200 + body = r.json() + + def _flatten_keys(obj, out): + if isinstance(obj, dict): + out.update(obj.keys()) + for v in obj.values(): + _flatten_keys(v, out) + elif isinstance(obj, list): + for v in obj: + _flatten_keys(v, out) + elif isinstance(obj, str): + out.add(obj) + + found: set[str] = set() + _flatten_keys(body, found) + assert {"en", "es", "fr"} <= found, f"missing core language codes in {body!r}" + + +def test_status_for_unknown_job_returns_4xx(app_client): + r = app_client.get("/api/status/this-job-does-not-exist") + assert r.status_code in (400, 404, 422) + + +def test_app_serves_openapi_json(app_client): + r = app_client.get("/openapi.json") + assert r.status_code == 200 + body = r.json() + assert "paths" in body + assert len(body["paths"]) > 0 + + +def test_app_serves_docs(app_client): + r = app_client.get("/docs") + assert r.status_code == 200 + assert "swagger" in r.text.lower() or "openapi" in r.text.lower() diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..fe22235b --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,143 @@ +""" +Shared pytest fixtures + heavy-module stubbing. + +The production code imports `mediapipe`, `ultralytics`, `torch`, `cv2`, +`scenedetect`, `yt_dlp`, `google.genai`, and `faster_whisper` at module +load time. None of those are available in the test environment by +default — and several can't even install on every Python version. + +To keep the safety net runnable on a stock laptop, we stub these out +in `sys.modules` BEFORE any production module is imported. The classes +under test (SmoothedCameraman, SpeakerTracker, _sanitize_filter_string, +generate_srt, create_hook_image, SUPPORTED_LANGUAGES) are pure Python +and never touch the stubbed surfaces, so the tests still exercise real +production logic. +""" +import os +import sys +from pathlib import Path +from unittest.mock import MagicMock + +# Make the repo root importable so `import main`, `import editor`, etc work. +REPO_ROOT = Path(__file__).resolve().parent.parent +if str(REPO_ROOT) not in sys.path: + sys.path.insert(0, str(REPO_ROOT)) + + +# --- Heavy module stubbing ----------------------------------------------- + +def _stub(name): + """Register a MagicMock for a module name if it is not already importable.""" + if name in sys.modules: + return + try: + __import__(name) + except Exception: + sys.modules[name] = MagicMock(name=name) + + +# Mock ML / video deps that are heavy or platform-restricted. +# Order matters for submodules: parent first, then attributes. +for _m in ( + "cv2", + "scenedetect", + "scenedetect.detectors", + "ultralytics", + "torch", + "torchvision", + "mediapipe", + "yt_dlp", + "tqdm", + "faster_whisper", + "google", + "google.genai", + "google.genai.types", + "google.genai.errors", + "google.protobuf", +): + _stub(_m) + +# scenedetect's `open_video`, `SceneManager`, and `ContentDetector` are +# imported by name from main.py. Wire them onto the mock so the import +# doesn't ImportError. +if isinstance(sys.modules.get("scenedetect"), MagicMock): + sys.modules["scenedetect"].open_video = MagicMock(name="open_video") + sys.modules["scenedetect"].SceneManager = MagicMock(name="SceneManager") +if isinstance(sys.modules.get("scenedetect.detectors"), MagicMock): + sys.modules["scenedetect.detectors"].ContentDetector = MagicMock( + name="ContentDetector" + ) + +# ultralytics.YOLO('yolov8n.pt') runs at main.py import time. Make it +# return a harmless MagicMock instead of trying to download weights. +if isinstance(sys.modules.get("ultralytics"), MagicMock): + sys.modules["ultralytics"].YOLO = MagicMock(name="YOLO") + +# mediapipe.solutions.face_detection.FaceDetection(...) runs at import. +if isinstance(sys.modules.get("mediapipe"), MagicMock): + mp_mock = sys.modules["mediapipe"] + mp_mock.solutions = MagicMock() + mp_mock.solutions.face_detection = MagicMock() + mp_mock.solutions.face_detection.FaceDetection = MagicMock() + +# google.genai is referenced as `from google import genai`. Make the +# `genai` attribute return our stub. +if isinstance(sys.modules.get("google"), MagicMock): + sys.modules["google"].genai = sys.modules.get("google.genai", MagicMock()) + +# Make boto3 importable even without it installed (s3_uploader uses it +# at module load via `import boto3`). It's tiny so dev-requirements +# pulls it in, but stub as a safety net. +_stub("boto3") +_stub("botocore") +_stub("botocore.exceptions") +if isinstance(sys.modules.get("botocore.exceptions"), MagicMock): + sys.modules["botocore.exceptions"].ClientError = type( + "ClientError", (Exception,), {} + ) + + +# --- Fixtures ------------------------------------------------------------ + +import pytest # noqa: E402 (after sys.modules stubbing) + + +@pytest.fixture +def tmp_output_dir(tmp_path, monkeypatch): + """Per-test temporary OUTPUT_DIR / UPLOAD_DIR pair.""" + output = tmp_path / "output" + uploads = tmp_path / "uploads" + output.mkdir() + uploads.mkdir() + monkeypatch.chdir(tmp_path) + yield output, uploads + + +@pytest.fixture +def fake_transcript(): + """A synthetic faster-whisper transcript with word-level timing. + + Shape matches what main.transcribe_video returns: + {"text": str, "language": str, + "segments": [{"start": float, "end": float, "text": str, + "words": [{"start": float, "end": float, "word": str}, ...]}]} + """ + return { + "text": "Hello world this is a test", + "language": "en", + "segments": [ + { + "start": 0.0, + "end": 6.0, + "text": "Hello world this is a test", + "words": [ + {"start": 0.0, "end": 0.5, "word": "Hello"}, + {"start": 0.6, "end": 1.1, "word": "world"}, + {"start": 1.2, "end": 1.5, "word": "this"}, + {"start": 1.6, "end": 1.8, "word": "is"}, + {"start": 1.9, "end": 2.0, "word": "a"}, + {"start": 2.1, "end": 2.6, "word": "test"}, + ], + } + ], + } diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/e2e/test_pipeline_smoke.py b/tests/e2e/test_pipeline_smoke.py new file mode 100644 index 00000000..c9b46e05 --- /dev/null +++ b/tests/e2e/test_pipeline_smoke.py @@ -0,0 +1,108 @@ +""" +End-to-end pipeline smoke test. + +Runs the real video-to-shorts pipeline against a tiny committed +fixture (tests/fixtures/smoke.mp4). Requires: + - ffmpeg on PATH + - The fixture file present (otherwise the test is SKIPPED) + - All real Python deps installed (mediapipe, ultralytics, etc.) + +This test is marked `e2e` and is excluded from the default `pytest -m +"not e2e"` run. It is the slowest test and the most production-faithful +one: it proves the pipeline still produces a valid vertical clip after +the restructure. + +To run only this: + pytest -m e2e +""" +import json +import os +import shutil +import subprocess +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent.parent +FIXTURE = REPO_ROOT / "tests" / "fixtures" / "smoke.mp4" + + +pytestmark = pytest.mark.e2e + + +def _ffmpeg_available() -> bool: + return shutil.which("ffmpeg") is not None and shutil.which("ffprobe") is not None + + +def _ffprobe(path: Path) -> dict: + out = subprocess.check_output([ + "ffprobe", "-v", "error", + "-show_entries", "stream=width,height,codec_type:format=duration", + "-of", "json", str(path), + ]) + return json.loads(out) + + +@pytest.fixture +def real_modules_available(): + """Skip the e2e test if production deps aren't actually installed. + + The unit/api tests run with sys.modules stubs from conftest.py. The + e2e test needs the REAL packages — if any are still stubs, bail. + """ + import sys + from unittest.mock import MagicMock + + required = ["cv2", "mediapipe", "ultralytics", "yt_dlp", "scenedetect"] + for mod in required: + installed = sys.modules.get(mod) + if installed is None or isinstance(installed, MagicMock): + pytest.skip(f"e2e needs real {mod} installed (got stub or missing)") + + +def test_pipeline_produces_vertical_clip(real_modules_available, tmp_path): + if not _ffmpeg_available(): + pytest.skip("ffmpeg/ffprobe not on PATH") + if not FIXTURE.exists(): + pytest.skip( + f"missing fixture {FIXTURE} — see tests/fixtures/README.md for " + "how to generate one" + ) + + # Run the existing main.py orchestrator directly. This is the same + # entrypoint the FastAPI subprocess worker invokes for real jobs. + output_dir = tmp_path / "output" + output_dir.mkdir() + + proc = subprocess.run( + [ + "python", str(REPO_ROOT / "main.py"), + "-i", str(FIXTURE), + "-o", str(output_dir), + ], + capture_output=True, text=True, timeout=600, + env={**os.environ, "PYTHONPATH": str(REPO_ROOT)}, + ) + assert proc.returncode == 0, ( + f"main.py exited non-zero.\nSTDOUT:\n{proc.stdout}\nSTDERR:\n{proc.stderr}" + ) + + # At least one clip MP4 was produced. + clips = list(output_dir.glob("*_clip_*.mp4")) + assert clips, f"no clips in {output_dir}\nSTDOUT:\n{proc.stdout}" + + # The first clip must be a valid MP4 that's vertical (9:16 ratio). + probe = _ffprobe(clips[0]) + streams = probe.get("streams", []) + video = next(s for s in streams if s.get("codec_type") == "video") + w, h = video["width"], video["height"] + aspect = w / h + # 9/16 = 0.5625. Allow a 5% tolerance for encoder padding. + assert 0.5 < aspect < 0.625, f"expected ~9:16, got {w}x{h} (aspect {aspect:.3f})" + + # Metadata JSON exists alongside the clips. + meta_files = list(output_dir.glob("*_metadata.json")) + assert meta_files, f"no metadata json produced in {output_dir}" + meta = json.loads(meta_files[0].read_text(encoding="utf-8")) + assert "shorts" in meta + assert isinstance(meta["shorts"], list) diff --git a/tests/fixtures/README.md b/tests/fixtures/README.md new file mode 100644 index 00000000..9b5542b9 --- /dev/null +++ b/tests/fixtures/README.md @@ -0,0 +1,33 @@ +# Test fixtures + +Put small reusable test inputs here. + +## `smoke.mp4` + +The e2e pipeline smoke test (`tests/e2e/test_pipeline_smoke.py`) looks for a +file at `tests/fixtures/smoke.mp4`. + +If the file is missing, the e2e test is **skipped** (not failed) so the +non-e2e suite stays green on machines without a fixture committed. + +Requirements for the fixture: + +- Roughly 5 seconds long +- Landscape (16:9) source so the vertical reframing actually has to crop +- Contains at least one detectable face for the speaker tracker +- Has an audio track +- Small file (<= 2 MB so it can be committed) +- Creative-commons or self-recorded so it can be redistributed + +To generate a quick synthetic one with ffmpeg: + +```bash +ffmpeg -y -f lavfi -i testsrc2=size=1280x720:rate=30:duration=5 \ + -f lavfi -i sine=frequency=440:duration=5 \ + -c:v libx264 -preset fast -crf 28 -c:a aac \ + tests/fixtures/smoke.mp4 +``` + +That file has no face though, so the speaker tracker will hit the YOLO +fallback. For a more representative fixture, trim a 5-second clip from any +talking-head video you own. diff --git a/tests/snapshots/.gitkeep b/tests/snapshots/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/tests/snapshots/baseline.openapi.json b/tests/snapshots/baseline.openapi.json new file mode 100644 index 00000000..b9424deb --- /dev/null +++ b/tests/snapshots/baseline.openapi.json @@ -0,0 +1,2174 @@ +{ + "components": { + "schemas": { + "Body_process_endpoint_api_process_post": { + "properties": { + "acknowledged": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Acknowledged" + }, + "file": { + "anyOf": [ + { + "format": "binary", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File" + }, + "url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Url" + } + }, + "title": "Body_process_endpoint_api_process_post", + "type": "object" + }, + "Body_saasshorts_actor_upload_api_saasshorts_actor_upload_post": { + "properties": { + "file": { + "format": "binary", + "title": "File", + "type": "string" + } + }, + "required": [ + "file" + ], + "title": "Body_saasshorts_actor_upload_api_saasshorts_actor_upload_post", + "type": "object" + }, + "Body_thumbnail_analyze_api_thumbnail_analyze_post": { + "properties": { + "file": { + "anyOf": [ + { + "format": "binary", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File" + }, + "session_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Session Id" + }, + "url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Url" + } + }, + "title": "Body_thumbnail_analyze_api_thumbnail_analyze_post", + "type": "object" + }, + "Body_thumbnail_generate_api_thumbnail_generate_post": { + "properties": { + "background": { + "anyOf": [ + { + "format": "binary", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Background" + }, + "count": { + "default": 3, + "title": "Count", + "type": "integer" + }, + "extra_prompt": { + "default": "", + "title": "Extra Prompt", + "type": "string" + }, + "face": { + "anyOf": [ + { + "format": "binary", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Face" + }, + "session_id": { + "title": "Session Id", + "type": "string" + }, + "title": { + "title": "Title", + "type": "string" + } + }, + "required": [ + "session_id", + "title" + ], + "title": "Body_thumbnail_generate_api_thumbnail_generate_post", + "type": "object" + }, + "Body_thumbnail_publish_api_thumbnail_publish_post": { + "properties": { + "api_key": { + "title": "Api Key", + "type": "string" + }, + "description": { + "title": "Description", + "type": "string" + }, + "session_id": { + "title": "Session Id", + "type": "string" + }, + "thumbnail_url": { + "title": "Thumbnail Url", + "type": "string" + }, + "title": { + "title": "Title", + "type": "string" + }, + "user_id": { + "title": "User Id", + "type": "string" + } + }, + "required": [ + "session_id", + "title", + "description", + "thumbnail_url", + "api_key", + "user_id" + ], + "title": "Body_thumbnail_publish_api_thumbnail_publish_post", + "type": "object" + }, + "Body_thumbnail_upload_api_thumbnail_upload_post": { + "properties": { + "file": { + "anyOf": [ + { + "format": "binary", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File" + }, + "url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Url" + } + }, + "title": "Body_thumbnail_upload_api_thumbnail_upload_post", + "type": "object" + }, + "EditRequest": { + "properties": { + "api_key": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Api Key" + }, + "clip_index": { + "title": "Clip Index", + "type": "integer" + }, + "input_filename": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Input Filename" + }, + "job_id": { + "title": "Job Id", + "type": "string" + } + }, + "required": [ + "job_id", + "clip_index" + ], + "title": "EditRequest", + "type": "object" + }, + "EffectsGenerateRequest": { + "properties": { + "clip_index": { + "title": "Clip Index", + "type": "integer" + }, + "input_filename": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Input Filename" + }, + "job_id": { + "title": "Job Id", + "type": "string" + } + }, + "required": [ + "job_id", + "clip_index" + ], + "title": "EffectsGenerateRequest", + "type": "object" + }, + "HTTPValidationError": { + "properties": { + "detail": { + "items": { + "$ref": "#/components/schemas/ValidationError" + }, + "title": "Detail", + "type": "array" + } + }, + "title": "HTTPValidationError", + "type": "object" + }, + "HookRequest": { + "properties": { + "clip_index": { + "title": "Clip Index", + "type": "integer" + }, + "input_filename": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Input Filename" + }, + "job_id": { + "title": "Job Id", + "type": "string" + }, + "position": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": "top", + "title": "Position" + }, + "size": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": "M", + "title": "Size" + }, + "text": { + "title": "Text", + "type": "string" + } + }, + "required": [ + "job_id", + "clip_index", + "text" + ], + "title": "HookRequest", + "type": "object" + }, + "SaaSActorRequest": { + "properties": { + "actor_description": { + "title": "Actor Description", + "type": "string" + }, + "num_options": { + "default": 3, + "title": "Num Options", + "type": "integer" + }, + "product_description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Product Description" + } + }, + "required": [ + "actor_description" + ], + "title": "SaaSActorRequest", + "type": "object" + }, + "SaaSAnalyzeRequest": { + "properties": { + "actor_gender": { + "default": "female", + "title": "Actor Gender", + "type": "string" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "language": { + "default": "en", + "title": "Language", + "type": "string" + }, + "num_scripts": { + "default": 3, + "title": "Num Scripts", + "type": "integer" + }, + "style": { + "default": "ugc", + "title": "Style", + "type": "string" + }, + "url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Url" + } + }, + "title": "SaaSAnalyzeRequest", + "type": "object" + }, + "SaaSGenerateRequest": { + "properties": { + "actor_description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Actor Description" + }, + "retry_job_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Retry Job Id" + }, + "script": { + "additionalProperties": true, + "title": "Script", + "type": "object" + }, + "selected_actor_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Selected Actor Url" + }, + "video_mode": { + "default": "lowcost", + "title": "Video Mode", + "type": "string" + }, + "voice_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Voice Id" + } + }, + "required": [ + "script" + ], + "title": "SaaSGenerateRequest", + "type": "object" + }, + "SaaSPostRequest": { + "properties": { + "api_key": { + "title": "Api Key", + "type": "string" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "job_id": { + "title": "Job Id", + "type": "string" + }, + "platforms": { + "items": { + "type": "string" + }, + "title": "Platforms", + "type": "array" + }, + "scheduled_date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Scheduled Date" + }, + "timezone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": "UTC", + "title": "Timezone" + }, + "title": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Title" + }, + "user_id": { + "title": "User Id", + "type": "string" + } + }, + "required": [ + "job_id", + "api_key", + "user_id", + "platforms" + ], + "title": "SaaSPostRequest", + "type": "object" + }, + "SocialPostRequest": { + "properties": { + "api_key": { + "title": "Api Key", + "type": "string" + }, + "clip_index": { + "title": "Clip Index", + "type": "integer" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "job_id": { + "title": "Job Id", + "type": "string" + }, + "platforms": { + "items": { + "type": "string" + }, + "title": "Platforms", + "type": "array" + }, + "scheduled_date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Scheduled Date" + }, + "timezone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": "UTC", + "title": "Timezone" + }, + "title": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Title" + }, + "user_id": { + "title": "User Id", + "type": "string" + } + }, + "required": [ + "job_id", + "clip_index", + "api_key", + "user_id", + "platforms" + ], + "title": "SocialPostRequest", + "type": "object" + }, + "SubtitleRequest": { + "properties": { + "bg_color": { + "default": "#000000", + "title": "Bg Color", + "type": "string" + }, + "bg_opacity": { + "default": 0.0, + "title": "Bg Opacity", + "type": "number" + }, + "border_color": { + "default": "#000000", + "title": "Border Color", + "type": "string" + }, + "border_width": { + "default": 2, + "title": "Border Width", + "type": "integer" + }, + "clip_index": { + "title": "Clip Index", + "type": "integer" + }, + "font_color": { + "default": "#FFFFFF", + "title": "Font Color", + "type": "string" + }, + "font_name": { + "default": "Verdana", + "title": "Font Name", + "type": "string" + }, + "font_size": { + "default": 16, + "title": "Font Size", + "type": "integer" + }, + "input_filename": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Input Filename" + }, + "job_id": { + "title": "Job Id", + "type": "string" + }, + "position": { + "default": "bottom", + "title": "Position", + "type": "string" + } + }, + "required": [ + "job_id", + "clip_index" + ], + "title": "SubtitleRequest", + "type": "object" + }, + "ThumbnailDescribeRequest": { + "properties": { + "session_id": { + "title": "Session Id", + "type": "string" + }, + "title": { + "title": "Title", + "type": "string" + } + }, + "required": [ + "session_id", + "title" + ], + "title": "ThumbnailDescribeRequest", + "type": "object" + }, + "ThumbnailTitlesRequest": { + "properties": { + "message": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Message" + }, + "session_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Session Id" + }, + "title": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Title" + } + }, + "title": "ThumbnailTitlesRequest", + "type": "object" + }, + "TranslateRequest": { + "properties": { + "clip_index": { + "title": "Clip Index", + "type": "integer" + }, + "input_filename": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Input Filename" + }, + "job_id": { + "title": "Job Id", + "type": "string" + }, + "source_language": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Source Language" + }, + "target_language": { + "title": "Target Language", + "type": "string" + } + }, + "required": [ + "job_id", + "clip_index", + "target_language" + ], + "title": "TranslateRequest", + "type": "object" + }, + "ValidationError": { + "properties": { + "ctx": { + "title": "Context", + "type": "object" + }, + "input": { + "title": "Input" + }, + "loc": { + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "integer" + } + ] + }, + "title": "Location", + "type": "array" + }, + "msg": { + "title": "Message", + "type": "string" + }, + "type": { + "title": "Error Type", + "type": "string" + } + }, + "required": [ + "loc", + "msg", + "type" + ], + "title": "ValidationError", + "type": "object" + } + } + }, + "info": { + "title": "FastAPI", + "version": "0.1.0" + }, + "openapi": "3.1.0", + "paths": { + "/api/clip/{job_id}/{clip_index}/transcript": { + "get": { + "description": "Return word-level captions for a specific clip, formatted for Remotion.", + "operationId": "get_clip_transcript_api_clip__job_id___clip_index__transcript_get", + "parameters": [ + { + "in": "path", + "name": "job_id", + "required": true, + "schema": { + "title": "Job Id", + "type": "string" + } + }, + { + "in": "path", + "name": "clip_index", + "required": true, + "schema": { + "title": "Clip Index", + "type": "integer" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Get Clip Transcript" + } + }, + "/api/config": { + "get": { + "operationId": "get_config_api_config_get", + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + } + }, + "summary": "Get Config" + } + }, + "/api/edit": { + "post": { + "operationId": "edit_clip_api_edit_post", + "parameters": [ + { + "in": "header", + "name": "X-Gemini-Key", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "X-Gemini-Key" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EditRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Edit Clip" + } + }, + "/api/effects/generate": { + "post": { + "description": "Generate structured EffectsConfig JSON for Remotion rendering via Gemini AI.", + "operationId": "generate_effects_config_api_effects_generate_post", + "parameters": [ + { + "in": "header", + "name": "X-Gemini-Key", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "X-Gemini-Key" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EffectsGenerateRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Generate Effects Config" + } + }, + "/api/hook": { + "post": { + "operationId": "add_hook_api_hook_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HookRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Add Hook" + } + }, + "/api/process": { + "post": { + "operationId": "process_endpoint_api_process_post", + "requestBody": { + "content": { + "multipart/form-data": { + "schema": { + "$ref": "#/components/schemas/Body_process_endpoint_api_process_post" + } + } + } + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Process Endpoint" + } + }, + "/api/render": { + "post": { + "description": "Proxy render requests to the Node.js Remotion render service.", + "operationId": "proxy_render_api_render_post", + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + } + }, + "summary": "Proxy Render" + } + }, + "/api/render/{render_id}": { + "get": { + "description": "Proxy render status polling to the Node.js Remotion render service.", + "operationId": "proxy_render_status_api_render__render_id__get", + "parameters": [ + { + "in": "path", + "name": "render_id", + "required": true, + "schema": { + "title": "Render Id", + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Proxy Render Status" + } + }, + "/api/saasshorts/actor-gallery": { + "get": { + "description": "List all previously generated actor images from public S3.", + "operationId": "saasshorts_actor_gallery_api_saasshorts_actor_gallery_get", + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + } + }, + "summary": "Saasshorts Actor Gallery" + } + }, + "/api/saasshorts/actor-options": { + "post": { + "description": "Generate multiple actor image options for the user to choose from.", + "operationId": "saasshorts_actor_options_api_saasshorts_actor_options_post", + "parameters": [ + { + "in": "header", + "name": "X-Fal-Key", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "X-Fal-Key" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SaaSActorRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Saasshorts Actor Options" + } + }, + "/api/saasshorts/actor-upload": { + "post": { + "description": "Upload a custom actor image (stored locally only, not S3).", + "operationId": "saasshorts_actor_upload_api_saasshorts_actor_upload_post", + "requestBody": { + "content": { + "multipart/form-data": { + "schema": { + "$ref": "#/components/schemas/Body_saasshorts_actor_upload_api_saasshorts_actor_upload_post" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Saasshorts Actor Upload" + } + }, + "/api/saasshorts/analyze": { + "post": { + "description": "Analyze a URL or manual description and generate video scripts.", + "operationId": "saasshorts_analyze_api_saasshorts_analyze_post", + "parameters": [ + { + "in": "header", + "name": "X-Gemini-Key", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "X-Gemini-Key" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SaaSAnalyzeRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Saasshorts Analyze" + } + }, + "/api/saasshorts/gallery": { + "get": { + "description": "List all UGC videos from the public gallery.", + "operationId": "saasshorts_video_gallery_api_saasshorts_gallery_get", + "parameters": [ + { + "in": "query", + "name": "limit", + "required": false, + "schema": { + "default": 50, + "title": "Limit", + "type": "integer" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Saasshorts Video Gallery" + } + }, + "/api/saasshorts/generate": { + "post": { + "description": "Generate a SaaS UGC video from a script. Returns a job_id for polling.", + "operationId": "saasshorts_generate_api_saasshorts_generate_post", + "parameters": [ + { + "in": "header", + "name": "X-Fal-Key", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "X-Fal-Key" + } + }, + { + "in": "header", + "name": "X-ElevenLabs-Key", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "X-Elevenlabs-Key" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SaaSGenerateRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Saasshorts Generate" + } + }, + "/api/saasshorts/post": { + "post": { + "description": "Post an AI Shorts video to social media via Upload-Post.", + "operationId": "saasshorts_post_to_socials_api_saasshorts_post_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SaaSPostRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Saasshorts Post To Socials" + } + }, + "/api/saasshorts/status/{job_id}": { + "get": { + "description": "Poll SaaSShorts job status.", + "operationId": "saasshorts_status_api_saasshorts_status__job_id__get", + "parameters": [ + { + "in": "path", + "name": "job_id", + "required": true, + "schema": { + "title": "Job Id", + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Saasshorts Status" + } + }, + "/api/saasshorts/voices": { + "get": { + "description": "List available ElevenLabs voices.", + "operationId": "saasshorts_voices_api_saasshorts_voices_get", + "parameters": [ + { + "in": "header", + "name": "X-ElevenLabs-Key", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "X-Elevenlabs-Key" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Saasshorts Voices" + } + }, + "/api/social/post": { + "post": { + "operationId": "post_to_socials_api_social_post_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SocialPostRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Post To Socials" + } + }, + "/api/social/user": { + "get": { + "description": "Proxy to fetch user ID from Upload-Post", + "operationId": "get_social_user_api_social_user_get", + "parameters": [ + { + "in": "header", + "name": "X-Upload-Post-Key", + "required": true, + "schema": { + "title": "X-Upload-Post-Key", + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Get Social User" + } + }, + "/api/status/{job_id}": { + "get": { + "operationId": "get_status_api_status__job_id__get", + "parameters": [ + { + "in": "path", + "name": "job_id", + "required": true, + "schema": { + "title": "Job Id", + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Get Status" + } + }, + "/api/subtitle": { + "post": { + "operationId": "add_subtitles_api_subtitle_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SubtitleRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Add Subtitles" + } + }, + "/api/thumbnail/analyze": { + "post": { + "description": "Analyze a video and suggest viral YouTube titles.", + "operationId": "thumbnail_analyze_api_thumbnail_analyze_post", + "parameters": [ + { + "in": "header", + "name": "X-Gemini-Key", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "X-Gemini-Key" + } + } + ], + "requestBody": { + "content": { + "multipart/form-data": { + "schema": { + "$ref": "#/components/schemas/Body_thumbnail_analyze_api_thumbnail_analyze_post" + } + } + } + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Thumbnail Analyze" + } + }, + "/api/thumbnail/describe": { + "post": { + "description": "Generate a YouTube description with chapters from the transcript.", + "operationId": "thumbnail_describe_api_thumbnail_describe_post", + "parameters": [ + { + "in": "header", + "name": "X-Gemini-Key", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "X-Gemini-Key" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ThumbnailDescribeRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Thumbnail Describe" + } + }, + "/api/thumbnail/generate": { + "post": { + "description": "Generate YouTube thumbnails with Gemini image generation.", + "operationId": "thumbnail_generate_api_thumbnail_generate_post", + "parameters": [ + { + "in": "header", + "name": "X-Gemini-Key", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "X-Gemini-Key" + } + } + ], + "requestBody": { + "content": { + "multipart/form-data": { + "schema": { + "$ref": "#/components/schemas/Body_thumbnail_generate_api_thumbnail_generate_post" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Thumbnail Generate" + } + }, + "/api/thumbnail/publish": { + "post": { + "description": "Kick off a background upload to YouTube via Upload-Post and return immediately.", + "operationId": "thumbnail_publish_api_thumbnail_publish_post", + "requestBody": { + "content": { + "application/x-www-form-urlencoded": { + "schema": { + "$ref": "#/components/schemas/Body_thumbnail_publish_api_thumbnail_publish_post" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Thumbnail Publish" + } + }, + "/api/thumbnail/publish/status/{publish_id}": { + "get": { + "description": "Poll the status of a background publish job.", + "operationId": "thumbnail_publish_status_api_thumbnail_publish_status__publish_id__get", + "parameters": [ + { + "in": "path", + "name": "publish_id", + "required": true, + "schema": { + "title": "Publish Id", + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Thumbnail Publish Status" + } + }, + "/api/thumbnail/titles": { + "post": { + "description": "Refine title suggestions or accept a manual title.", + "operationId": "thumbnail_titles_api_thumbnail_titles_post", + "parameters": [ + { + "in": "header", + "name": "X-Gemini-Key", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "X-Gemini-Key" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ThumbnailTitlesRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Thumbnail Titles" + } + }, + "/api/thumbnail/upload": { + "post": { + "description": "Upload video and start background Whisper transcription immediately.", + "operationId": "thumbnail_upload_api_thumbnail_upload_post", + "requestBody": { + "content": { + "multipart/form-data": { + "schema": { + "$ref": "#/components/schemas/Body_thumbnail_upload_api_thumbnail_upload_post" + } + } + } + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Thumbnail Upload" + } + }, + "/api/translate": { + "post": { + "description": "Translate a video clip to a different language using ElevenLabs dubbing.", + "operationId": "translate_clip_api_translate_post", + "parameters": [ + { + "in": "header", + "name": "X-ElevenLabs-Key", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "X-Elevenlabs-Key" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/TranslateRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Translate Clip" + } + }, + "/api/translate/languages": { + "get": { + "description": "Return supported languages for translation.", + "operationId": "get_languages_api_translate_languages_get", + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + } + }, + "summary": "Get Languages" + } + }, + "/gallery": { + "get": { + "description": "SEO gallery page with all generated UGC videos.", + "operationId": "gallery_html_page_gallery_get", + "responses": { + "200": { + "content": { + "text/html": { + "schema": { + "type": "string" + } + } + }, + "description": "Successful Response" + } + }, + "summary": "Gallery Html Page" + } + }, + "/video/{video_id}": { + "get": { + "description": "SEO individual video page with og:video meta tags.", + "operationId": "video_html_page_video__video_id__get", + "parameters": [ + { + "in": "path", + "name": "video_id", + "required": true, + "schema": { + "title": "Video Id", + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "text/html": { + "schema": { + "type": "string" + } + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Video Html Page" + } + } + } +} \ No newline at end of file diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/test_filter_sanitization.py b/tests/unit/test_filter_sanitization.py new file mode 100644 index 00000000..60e9e4ae --- /dev/null +++ b/tests/unit/test_filter_sanitization.py @@ -0,0 +1,87 @@ +""" +Characterization tests for VideoEditor._sanitize_filter_string and +_enforce_zoompan_output_size in editor.py. + +Both will move into openshorts/utils/filters.py in Phase 1 (made +shared so the motion-graphics and audio compositors can reuse them). +These tests lock in the conversions FFmpeg builds depend on. +""" +import pytest + +from editor import VideoEditor + + +# --- _sanitize_filter_string -------------------------------------------- + + +@pytest.mark.parametrize( + "raw, expected", + [ + # Simple comparisons. + ("t<3", "lt(t,3)"), + ("t<=3", "lte(t,3)"), + ("on>3", "gt(on,3)"), + ("on>=75", "gte(on,75)"), + # Decimals. + ("t<3.5", "lt(t,3.5)"), + ("t>=2.25", "gte(t,2.25)"), + # Negative numbers. + ("x<-5", "lt(x,-5)"), + # Whitespace tolerance. + ("t < 3", "lt(t,3)"), + ("on >= 75", "gte(on,75)"), + # Already function-form should be left alone. + ("lt(t,3)", "lt(t,3)"), + ("gte(on,75)", "gte(on,75)"), + # Nothing to sanitize. + ("eq=brightness=0.1", "eq=brightness=0.1"), + ], +) +def test_sanitize_converts_comparison_operators(raw, expected): + assert VideoEditor._sanitize_filter_string(raw) == expected + + +def test_sanitize_handles_multiple_operators_in_one_string(): + raw = "enable='between(t,1,3)':eval=frame:expr='if(t<2,0.5,1)'" + out = VideoEditor._sanitize_filter_string(raw) + assert "lt(t,2)" in out + # The `between(t,1,3)` token has no `<` / `>` so it survives. + assert "between(t,1,3)" in out + + +def test_sanitize_does_not_touch_unrelated_arithmetic(): + raw = "scale=w=iw/2:h=ih/2" + assert VideoEditor._sanitize_filter_string(raw) == raw + + +# --- _enforce_zoompan_output_size --------------------------------------- + + +def test_enforce_replaces_existing_size(): + raw = "zoompan=z='zoom+0.001':d=125:s=640x480" + out = VideoEditor._enforce_zoompan_output_size(raw, 1920, 1080) + assert ":s=1920x1080" in out + assert ":s=640x480" not in out + + +def test_enforce_appends_size_if_missing(): + raw = "zoompan=z='zoom+0.001':d=125" + out = VideoEditor._enforce_zoompan_output_size(raw, 1280, 720) + assert out.endswith(":s=1280x720") + + +def test_enforce_leaves_non_zoompan_filters_untouched(): + raw = "eq=brightness=0.1:contrast=1.2" + assert VideoEditor._enforce_zoompan_output_size(raw, 1920, 1080) == raw + + +def test_enforce_processes_each_filter_in_a_chain(): + # Two zoompans in the same chain — both should get the size enforced. + raw = "zoompan=z='1.1':d=10,format=yuv420p,zoompan=z='1.2':d=10:s=640x360" + out = VideoEditor._enforce_zoompan_output_size(raw, 1920, 1080) + parts = out.split(",") + zoompans = [p for p in parts if p.startswith("zoompan=")] + assert len(zoompans) == 2 + assert all(":s=1920x1080" in zp for zp in zoompans) + # Non-zoompan stays. + assert "format=yuv420p" in parts diff --git a/tests/unit/test_hook_image.py b/tests/unit/test_hook_image.py new file mode 100644 index 00000000..ca359cf6 --- /dev/null +++ b/tests/unit/test_hook_image.py @@ -0,0 +1,88 @@ +""" +Characterization tests for hooks.create_hook_image. + +Targets the real PIL rendering pipeline — no mocks. Uses the +committed fonts/NotoSerif-Bold.ttf so no font download happens at +test time. + +Asserts structural properties (file written, valid PNG, RGBA, has +visible pixels) rather than pixel-perfect content (too brittle). +""" +import os +import pytest +from PIL import Image + +from hooks import create_hook_image + + +def test_create_hook_image_writes_a_png(tmp_path): + out = tmp_path / "hook.png" + create_hook_image("Hello world", target_width=1080, output_image_path=str(out)) + assert out.exists() + assert out.stat().st_size > 0 + + +def test_created_image_is_rgba_with_visible_pixels(tmp_path): + out = tmp_path / "hook.png" + create_hook_image("Did you know?", target_width=1080, output_image_path=str(out)) + with Image.open(out) as img: + assert img.mode == "RGBA" + alpha = img.split()[-1] + visible_pixels = sum(1 for px in alpha.getdata() if px > 0) + assert visible_pixels > 0 + + +def test_image_width_is_bounded_by_target_width(tmp_path): + """ + `target_width` is the MAX width the card can occupy — for short + text the image will be narrower (just text + padding + shadow), + but it must never exceed target_width by more than the shadow + margin (~10 px on each side). + """ + out = tmp_path / "hook.png" + target = 1080 + create_hook_image("Hello", target_width=target, output_image_path=str(out)) + with Image.open(out) as img: + assert img.width > 0 + assert img.width <= target + 50 # shadow margin + + +def test_longer_text_pushes_width_up_to_target(tmp_path): + short = tmp_path / "short.png" + long_ = tmp_path / "long.png" + target = 1080 + create_hook_image("Hi", target_width=target, output_image_path=str(short)) + create_hook_image( + "This is a much longer hook that should fill the available width.", + target_width=target, output_image_path=str(long_), + ) + with Image.open(short) as a, Image.open(long_) as b: + assert b.width > a.width + + +def test_longer_text_produces_taller_image(tmp_path): + short_path = tmp_path / "short.png" + long_path = tmp_path / "long.png" + create_hook_image("Hi", target_width=1080, output_image_path=str(short_path)) + create_hook_image( + "This is a much longer hook that should wrap onto multiple lines because it " + "exceeds the pixel-based wrap width by a comfortable margin.", + target_width=1080, + output_image_path=str(long_path), + ) + with Image.open(short_path) as a, Image.open(long_path) as b: + assert b.height > a.height + + +def test_font_scale_increases_image_size(tmp_path): + small = tmp_path / "small.png" + big = tmp_path / "big.png" + create_hook_image("Same text", target_width=1080, output_image_path=str(small), + font_scale=1.0) + create_hook_image("Same text", target_width=1080, output_image_path=str(big), + font_scale=2.0) + with Image.open(small) as a, Image.open(big) as b: + # At 2x font scale, the image must be visibly bigger in at least one dimension. + assert (b.width >= a.width and b.height > a.height) or ( + b.height >= a.height and b.width > a.width + ) diff --git a/tests/unit/test_srt_generation.py b/tests/unit/test_srt_generation.py new file mode 100644 index 00000000..34242568 --- /dev/null +++ b/tests/unit/test_srt_generation.py @@ -0,0 +1,123 @@ +""" +Characterization tests for SRT generation in subtitles.py. + +Targets: generate_srt, format_srt_block, hex_to_ass_color. +These move to openshorts/overlays/subtitles_generate.py in Phase 1. +""" +import re +import pytest + +from subtitles import format_srt_block, generate_srt, hex_to_ass_color + + +# --- format_srt_block --------------------------------------------------- + + +def test_format_srt_block_produces_well_formed_block(): + block = format_srt_block(1, 0.0, 1.5, "Hello world") + lines = block.strip().splitlines() + assert lines[0] == "1" + assert re.match(r"^\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}$", lines[1]) + assert lines[2] == "Hello world" + + +def test_format_srt_block_time_formatting(): + block = format_srt_block(2, 3661.5, 3662.001, "X") + # 3661.5s = 1:01:01,500; 3662.001s = 1:01:02,001 + assert "01:01:01,500 --> 01:01:02,001" in block + + +def test_format_srt_block_pads_zeros_for_short_times(): + block = format_srt_block(3, 0.0, 0.123, "X") + assert "00:00:00,000 --> 00:00:00,123" in block + + +# --- generate_srt ------------------------------------------------------- + + +def test_generate_srt_creates_file_with_expected_word_groups(fake_transcript, tmp_path): + out = tmp_path / "out.srt" + ok = generate_srt( + fake_transcript, clip_start=0.0, clip_end=10.0, + output_path=str(out), max_chars=20, max_duration=2.0, + ) + assert ok is True + content = out.read_text(encoding="utf-8") + # All six words make it in. + for word in ("Hello", "world", "this", "is", "a", "test"): + assert word in content + # At least one well-formed block (index + arrow line + text + blank). + assert re.search( + r"^\d+\n\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}\n.+", + content, re.MULTILINE, + ) + + +def test_generate_srt_returns_false_when_clip_has_no_words(fake_transcript, tmp_path): + out = tmp_path / "empty.srt" + ok = generate_srt( + fake_transcript, clip_start=100.0, clip_end=200.0, + output_path=str(out), max_chars=20, max_duration=2.0, + ) + assert ok is False + assert not out.exists() + + +def test_generate_srt_times_are_relative_to_clip_start(fake_transcript, tmp_path): + """ + Times in the SRT file must be measured from clip_start, not the + absolute video timeline. With clip_start=1.5 the first word with + `end > 1.5` is "is" (start=1.6, end=1.8) → block starts at 0.100s. + """ + out = tmp_path / "rel.srt" + generate_srt( + fake_transcript, clip_start=1.5, clip_end=3.0, + output_path=str(out), max_chars=200, max_duration=10.0, + ) + content = out.read_text(encoding="utf-8") + assert "00:00:00,100" in content + # Sanity: the absolute timestamp 1.6s does NOT appear. + assert "00:00:01,600" not in content + + +def test_generate_srt_respects_max_chars(fake_transcript, tmp_path): + out = tmp_path / "short.srt" + generate_srt( + fake_transcript, clip_start=0.0, clip_end=10.0, + output_path=str(out), max_chars=5, max_duration=10.0, + ) + content = out.read_text(encoding="utf-8") + # With max_chars=5, every text line must be <=5 chars after strip. + # Pull out text lines (the ones that aren't index or arrow). + arrow_re = re.compile(r"^\d{2}:\d{2}:\d{2},\d{3} --> ") + for line in content.splitlines(): + if not line or line.isdigit() or arrow_re.match(line): + continue + assert len(line) <= 5, f"line {line!r} exceeded max_chars" + + +# --- hex_to_ass_color --------------------------------------------------- + + +def test_hex_to_ass_color_pure_red_full_opacity(): + # ASS order is &HAABBGGRR. Red (#FF0000) → AA=00, BB=00, GG=00, RR=FF. + assert hex_to_ass_color("#FF0000", opacity=1.0) == "&H000000FF" + + +def test_hex_to_ass_color_pure_white_full_opacity(): + assert hex_to_ass_color("#FFFFFF", opacity=1.0) == "&H00FFFFFF" + + +def test_hex_to_ass_color_partial_opacity(): + # opacity 0.5 → alpha 0x80 (128) + result = hex_to_ass_color("#000000", opacity=0.5) + assert result == "&H80000000" + + +def test_hex_to_ass_color_accepts_no_hash(): + assert hex_to_ass_color("FF0000", opacity=1.0) == "&H000000FF" + + +def test_hex_to_ass_color_falls_back_to_white_for_bad_input(): + # Malformed → fallback to FFFFFF (white). + assert hex_to_ass_color("XYZ", opacity=1.0) == "&H00FFFFFF" diff --git a/tests/unit/test_tracking.py b/tests/unit/test_tracking.py new file mode 100644 index 00000000..e706c486 --- /dev/null +++ b/tests/unit/test_tracking.py @@ -0,0 +1,175 @@ +""" +Characterization tests for SmoothedCameraman and SpeakerTracker. + +These classes live in main.py today and will move to +openshorts/video/tracking.py in Phase 1. The tests target the behaviors +that must be preserved across the move: + +- SmoothedCameraman: initial centering, safe-zone (no move when target + is inside it), slow vs fast pan speeds, force_snap, clamping to video + bounds. +- SpeakerTracker: no candidates → None, single candidate locks in, + switch cooldown prevents rapid re-targeting. +""" +import pytest + +from main import SmoothedCameraman, SpeakerTracker + + +# --- SmoothedCameraman --------------------------------------------------- + + +@pytest.fixture +def cameraman(): + # 9:16 output (1080x1920) cropped out of a 1920x1080 source. + # crop_width = video_height * (9/16) = 1080 * 0.5625 = 607 + return SmoothedCameraman( + output_width=1080, output_height=1920, + video_width=1920, video_height=1080, + ) + + +def test_initial_center_is_video_midpoint(cameraman): + assert cameraman.current_center_x == 960 # 1920 / 2 + assert cameraman.target_center_x == 960 + + +def test_crop_dimensions_for_landscape_source(cameraman): + # crop_height = video_height; crop_width = video_height * 9/16 + assert cameraman.crop_height == 1080 + assert cameraman.crop_width == int(1080 * (9 / 16)) + + +def test_safe_zone_radius_is_quarter_of_crop_width(cameraman): + assert cameraman.safe_zone_radius == cameraman.crop_width * 0.25 + + +def test_update_target_with_face_box_sets_center(cameraman): + cameraman.update_target((400, 100, 200, 200)) # x, y, w, h + assert cameraman.target_center_x == 500 # 400 + 200/2 + + +def test_update_target_with_none_leaves_target_unchanged(cameraman): + cameraman.target_center_x = 750 + cameraman.update_target(None) + assert cameraman.target_center_x == 750 + + +def test_force_snap_jumps_directly_to_target(cameraman): + cameraman.target_center_x = 1500 + cameraman.get_crop_box(force_snap=True) + assert cameraman.current_center_x == 1500 + + +def test_camera_does_not_move_when_target_within_safe_zone(cameraman): + # Target 50 px away — well inside the ~152 px safe zone. + cameraman.target_center_x = 960 + 50 + cameraman.get_crop_box(force_snap=False) + assert cameraman.current_center_x == 960 # unchanged + + +def test_camera_moves_slowly_outside_safe_zone(cameraman): + # 200 px away — outside safe zone (~152) but not "huge" (>303 = crop_width/2). + cameraman.target_center_x = 960 + 200 + cameraman.get_crop_box(force_snap=False) + # Slow pan = 3 px/frame + assert cameraman.current_center_x == pytest.approx(963.0) + + +def test_camera_moves_fast_when_distance_is_huge(cameraman): + # 600 px away — > crop_width/2 (303). Speed = 15 px/frame. + cameraman.target_center_x = 960 + 600 + cameraman.get_crop_box(force_snap=False) + assert cameraman.current_center_x == pytest.approx(975.0) + + +def test_get_crop_box_returns_a_9_16_window(cameraman): + x1, y1, x2, y2 = cameraman.get_crop_box(force_snap=True) + # y span is full video height + assert (y1, y2) == (0, 1080) + # x span equals crop_width + assert (x2 - x1) == cameraman.crop_width + + +def test_camera_clamps_to_left_edge(cameraman): + cameraman.target_center_x = 0 + cameraman.get_crop_box(force_snap=True) + x1, _, x2, _ = cameraman.get_crop_box(force_snap=False) + assert x1 == 0 + assert x2 == cameraman.crop_width + + +def test_camera_clamps_to_right_edge(cameraman): + cameraman.target_center_x = 5000 + cameraman.get_crop_box(force_snap=True) + x1, _, x2, _ = cameraman.get_crop_box(force_snap=False) + assert x2 == cameraman.video_width + + +# --- SpeakerTracker ----------------------------------------------------- + + +@pytest.fixture +def tracker(): + return SpeakerTracker(stabilization_frames=15, cooldown_frames=30) + + +def _face(x, y, w, h, score=10000): + return {"box": [x, y, w, h], "score": score} + + +def test_no_candidates_returns_none(tracker): + assert tracker.get_target([], frame_number=0, width=1920) is None + + +def test_single_candidate_returns_its_box(tracker): + box = [100, 100, 200, 200] + out = tracker.get_target([_face(*box)], frame_number=0, width=1920) + assert out == box + assert tracker.active_speaker_id == 0 + + +def test_same_face_in_subsequent_frames_keeps_active_speaker(tracker): + box = [100, 100, 200, 200] + tracker.get_target([_face(*box)], frame_number=0, width=1920) + speaker_a = tracker.active_speaker_id + tracker.get_target([_face(*box)], frame_number=5, width=1920) + tracker.get_target([_face(*box)], frame_number=10, width=1920) + assert tracker.active_speaker_id == speaker_a + + +def test_new_dominant_speaker_does_not_switch_within_cooldown(tracker): + # Frame 0: speaker A enters. + tracker.get_target([_face(100, 100, 200, 200, score=10000)], + frame_number=0, width=1920) + a_id = tracker.active_speaker_id + # Frame 10 (well inside cooldown=30): BOTH A and a much bigger + # speaker B are visible. The cooldown only protects against switching + # when the previous speaker is still on screen — if A disappeared, + # the tracker would correctly switch since there's no point holding + # an absent speaker. So we keep A on screen here. + tracker.get_target( + [ + _face(100, 100, 200, 200, score=10000), # A still there + _face(1500, 100, 400, 400, score=10_000_000), # B much bigger + ], + frame_number=10, width=1920, + ) + # Hysteresis + cooldown blocks the switch while A is still visible. + assert tracker.active_speaker_id == a_id + + +def test_speaker_switches_after_cooldown_when_a_new_face_dominates(tracker): + # Frame 0: speaker A. + tracker.get_target([_face(100, 100, 200, 200, score=10000)], + frame_number=0, width=1920) + a_id = tracker.active_speaker_id + # Frame 100 (well past cooldown=30): speaker B is the only face on + # screen. With no competing scores and no hysteresis bonus applicable + # (A is not present), B should take over. + out = tracker.get_target( + [_face(1500, 100, 400, 400, score=10_000_000)], + frame_number=100, width=1920, + ) + assert tracker.active_speaker_id != a_id + assert out == [1500, 100, 400, 400] diff --git a/tests/unit/test_translate_languages.py b/tests/unit/test_translate_languages.py new file mode 100644 index 00000000..a7ac107e --- /dev/null +++ b/tests/unit/test_translate_languages.py @@ -0,0 +1,40 @@ +""" +Characterization tests for translate.SUPPORTED_LANGUAGES and +get_supported_languages. + +Locks in the public surface so the restructure can't accidentally +drop or rename a language code. +""" +from translate import SUPPORTED_LANGUAGES, get_supported_languages + + +# A minimal canonical set we want the API to keep advertising. +_EXPECTED_CORE = { + "en", "es", "fr", "de", "it", "pt", "pl", "hi", "ja", "ko", + "zh", "ar", "ru", "tr", "nl", "sv", "id", "vi", "th", +} + + +def test_supported_languages_includes_core_codes(): + missing = _EXPECTED_CORE - set(SUPPORTED_LANGUAGES.keys()) + assert not missing, f"SUPPORTED_LANGUAGES dropped: {missing}" + + +def test_supported_languages_values_are_human_names(): + assert SUPPORTED_LANGUAGES["en"] == "English" + assert SUPPORTED_LANGUAGES["es"] == "Spanish" + assert SUPPORTED_LANGUAGES["zh"] == "Chinese" + + +def test_get_supported_languages_returns_a_copy(): + result = get_supported_languages() + assert result == SUPPORTED_LANGUAGES + # Mutating the copy must not poison the global. + result["xx"] = "TestLang" + assert "xx" not in SUPPORTED_LANGUAGES + + +def test_supported_languages_has_no_empty_values(): + for code, name in SUPPORTED_LANGUAGES.items(): + assert code, "empty language code" + assert isinstance(name, str) and name.strip(), f"empty name for {code}" From d7c5a5895e798f27f20b37b01b02ad13d18a854e Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse Date: Tue, 19 May 2026 15:44:16 -0400 Subject: [PATCH 02/43] chore(restructure): scaffold empty openshorts/ package + extend pyproject MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 step 0: create the package skeleton with __init__.py docstrings in every target folder so subsequent moves have a destination. No code moves yet — tests stay 62/62 green. Extends pyproject.toml with [build-system] + [project] + setuptools package discovery so `pip install -e .` exposes the new package. requires-python pinned to >=3.9 to match the local dev venv (Docker still uses 3.11). Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 5 +++++ openshorts/__init__.py | 1 + openshorts/audio/__init__.py | 1 + openshorts/core/__init__.py | 1 + openshorts/editing/__init__.py | 1 + openshorts/ingest/__init__.py | 1 + openshorts/integrations/__init__.py | 1 + openshorts/layouts/__init__.py | 1 + openshorts/ml/__init__.py | 1 + openshorts/models/__init__.py | 1 + openshorts/motion_graphics/__init__.py | 1 + openshorts/motion_graphics/library/__init__.py | 1 + openshorts/overlays/__init__.py | 1 + openshorts/prompts/__init__.py | 1 + openshorts/routes/__init__.py | 1 + openshorts/saas/__init__.py | 1 + openshorts/thumbnails/__init__.py | 1 + openshorts/utils/__init__.py | 1 + openshorts/video/__init__.py | 1 + pyproject.toml | 15 +++++++++++++++ 20 files changed, 38 insertions(+) create mode 100644 openshorts/__init__.py create mode 100644 openshorts/audio/__init__.py create mode 100644 openshorts/core/__init__.py create mode 100644 openshorts/editing/__init__.py create mode 100644 openshorts/ingest/__init__.py create mode 100644 openshorts/integrations/__init__.py create mode 100644 openshorts/layouts/__init__.py create mode 100644 openshorts/ml/__init__.py create mode 100644 openshorts/models/__init__.py create mode 100644 openshorts/motion_graphics/__init__.py create mode 100644 openshorts/motion_graphics/library/__init__.py create mode 100644 openshorts/overlays/__init__.py create mode 100644 openshorts/prompts/__init__.py create mode 100644 openshorts/routes/__init__.py create mode 100644 openshorts/saas/__init__.py create mode 100644 openshorts/thumbnails/__init__.py create mode 100644 openshorts/utils/__init__.py create mode 100644 openshorts/video/__init__.py diff --git a/.gitignore b/.gitignore index adeda0d3..57f2857b 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,11 @@ __pycache__/ *.pyc +# Editable-install / build artifacts +*.egg-info/ +build/ +dist/ + # Temporary files / runtime dirs temp_* uploads/ diff --git a/openshorts/__init__.py b/openshorts/__init__.py new file mode 100644 index 00000000..74e86d91 --- /dev/null +++ b/openshorts/__init__.py @@ -0,0 +1 @@ +"""OpenShorts: AI vertical short-video generator package root.""" diff --git a/openshorts/audio/__init__.py b/openshorts/audio/__init__.py new file mode 100644 index 00000000..3b721818 --- /dev/null +++ b/openshorts/audio/__init__.py @@ -0,0 +1 @@ +"""Audio mixing, soundtrack library, SFX cue generation (future feature A).""" diff --git a/openshorts/core/__init__.py b/openshorts/core/__init__.py new file mode 100644 index 00000000..306833c1 --- /dev/null +++ b/openshorts/core/__init__.py @@ -0,0 +1 @@ +"""Cross-cutting infrastructure: job queue, job store, api keys, logging.""" diff --git a/openshorts/editing/__init__.py b/openshorts/editing/__init__.py new file mode 100644 index 00000000..eed8d61f --- /dev/null +++ b/openshorts/editing/__init__.py @@ -0,0 +1 @@ +"""AI-generated FFmpeg filter pipeline and prompt templates.""" diff --git a/openshorts/ingest/__init__.py b/openshorts/ingest/__init__.py new file mode 100644 index 00000000..0f70dbc2 --- /dev/null +++ b/openshorts/ingest/__init__.py @@ -0,0 +1 @@ +"""Source ingestion: YouTube downloads and local file uploads.""" diff --git a/openshorts/integrations/__init__.py b/openshorts/integrations/__init__.py new file mode 100644 index 00000000..70e279e3 --- /dev/null +++ b/openshorts/integrations/__init__.py @@ -0,0 +1 @@ +"""External-service clients: S3, ElevenLabs, fal.ai, Upload-Post, Remotion, Gemini.""" diff --git a/openshorts/layouts/__init__.py b/openshorts/layouts/__init__.py new file mode 100644 index 00000000..d998f5c5 --- /dev/null +++ b/openshorts/layouts/__init__.py @@ -0,0 +1 @@ +"""Vertical-clip layout templates (future feature B): panorama, educational, etc.""" diff --git a/openshorts/ml/__init__.py b/openshorts/ml/__init__.py new file mode 100644 index 00000000..5004a94d --- /dev/null +++ b/openshorts/ml/__init__.py @@ -0,0 +1 @@ +"""AI / inference: face/person detection, transcription, Gemini clients.""" diff --git a/openshorts/models/__init__.py b/openshorts/models/__init__.py new file mode 100644 index 00000000..166facac --- /dev/null +++ b/openshorts/models/__init__.py @@ -0,0 +1 @@ +"""Pydantic request/response schemas grouped by API domain.""" diff --git a/openshorts/motion_graphics/__init__.py b/openshorts/motion_graphics/__init__.py new file mode 100644 index 00000000..111bf5ec --- /dev/null +++ b/openshorts/motion_graphics/__init__.py @@ -0,0 +1 @@ +"""Motion graphics overlays (future feature C): effects + compositor.""" diff --git a/openshorts/motion_graphics/library/__init__.py b/openshorts/motion_graphics/library/__init__.py new file mode 100644 index 00000000..24eaafb8 --- /dev/null +++ b/openshorts/motion_graphics/library/__init__.py @@ -0,0 +1 @@ +"""Motion-graphic effect templates registered with the compositor.""" diff --git a/openshorts/overlays/__init__.py b/openshorts/overlays/__init__.py new file mode 100644 index 00000000..70d64f6d --- /dev/null +++ b/openshorts/overlays/__init__.py @@ -0,0 +1 @@ +"""Text hook overlays and subtitle generation / burn-in.""" diff --git a/openshorts/prompts/__init__.py b/openshorts/prompts/__init__.py new file mode 100644 index 00000000..57ccda2f --- /dev/null +++ b/openshorts/prompts/__init__.py @@ -0,0 +1 @@ +"""Externalized Gemini prompt templates (one .md file per prompt).""" diff --git a/openshorts/routes/__init__.py b/openshorts/routes/__init__.py new file mode 100644 index 00000000..a275b378 --- /dev/null +++ b/openshorts/routes/__init__.py @@ -0,0 +1 @@ +"""FastAPI routers, one module per API domain.""" diff --git a/openshorts/saas/__init__.py b/openshorts/saas/__init__.py new file mode 100644 index 00000000..2fd86630 --- /dev/null +++ b/openshorts/saas/__init__.py @@ -0,0 +1 @@ +"""SaaS UGC pipeline: research, scripting, media generation, compositing.""" diff --git a/openshorts/thumbnails/__init__.py b/openshorts/thumbnails/__init__.py new file mode 100644 index 00000000..47440627 --- /dev/null +++ b/openshorts/thumbnails/__init__.py @@ -0,0 +1 @@ +"""YouTube thumbnail workflow: titles, images, descriptions.""" diff --git a/openshorts/utils/__init__.py b/openshorts/utils/__init__.py new file mode 100644 index 00000000..54702a27 --- /dev/null +++ b/openshorts/utils/__init__.py @@ -0,0 +1 @@ +"""Shared helpers: ffmpeg filter sanitization, path utilities.""" diff --git a/openshorts/video/__init__.py b/openshorts/video/__init__.py new file mode 100644 index 00000000..6bf44010 --- /dev/null +++ b/openshorts/video/__init__.py @@ -0,0 +1 @@ +"""Core video processing: scene analysis, tracking, reframing, ffmpeg wrapper.""" diff --git a/pyproject.toml b/pyproject.toml index cfb2e22d..4f740495 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,18 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "openshorts" +version = "0.1.0" +description = "AI vertical short-video generator" +requires-python = ">=3.9" + +[tool.setuptools.packages.find] +where = ["."] +include = ["openshorts*"] +exclude = ["tests*", "dashboard*", "remotion*", "render-service*"] + [tool.pytest.ini_options] testpaths = ["tests"] asyncio_mode = "auto" From ec44d4bbd80ce65df0e856ad2fc04276036dbbad Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse Date: Tue, 19 May 2026 15:45:54 -0400 Subject: [PATCH 03/43] chore(restructure): move s3_uploader -> openshorts/integrations/s3.py Phase 1 step 1: relocate the leaf module with the fewest reverse imports (only app.py imports from s3_uploader). Adds a re-export shim at the old path so existing `from s3_uploader import ...` keeps working through the restructure. Tests stay 62/62 green. Co-Authored-By: Claude Opus 4.7 (1M context) --- openshorts/integrations/s3.py | 445 ++++++++++++++++++++++++++++++++ s3_uploader.py | 465 ++-------------------------------- 2 files changed, 464 insertions(+), 446 deletions(-) create mode 100644 openshorts/integrations/s3.py diff --git a/openshorts/integrations/s3.py b/openshorts/integrations/s3.py new file mode 100644 index 00000000..71320425 --- /dev/null +++ b/openshorts/integrations/s3.py @@ -0,0 +1,445 @@ +"""AWS S3 client: clip uploads, actor gallery, UGC video gallery, presigned URLs.""" +import os +from dotenv import load_dotenv +load_dotenv() +import boto3 +from botocore.exceptions import ClientError +import logging + +# Configure silent logging for boto3 and botocore +logging.getLogger('boto3').setLevel(logging.CRITICAL) +logging.getLogger('botocore').setLevel(logging.CRITICAL) +logging.getLogger('s3transfer').setLevel(logging.CRITICAL) + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +def upload_file_to_s3(file_path, bucket_name, s3_key): + """ + Upload a file to an S3 bucket silently. + """ + access_key = os.environ.get('AWS_ACCESS_KEY_ID') + secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY') + region = os.environ.get('AWS_REGION', 'eu-west-3') + + if not access_key or not secret_key: + return False + + s3_client = boto3.client( + 's3', + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + region_name=region + ) + try: + # Extra arguments for public read if needed, but the user didn't specify. + # Given the bucket name, it might be for a web app. + s3_client.upload_file(file_path, bucket_name, s3_key) + return True + except ClientError: + return False + except Exception: + return False + + +from botocore.config import Config +import json +import time as time_module + +# Simple in-memory cache for gallery clips +_clips_cache = { + "data": None, + "timestamp": 0 +} +CACHE_TTL_SECONDS = 300 # 5 minutes + +def get_s3_client(): + """Returns an authenticated S3 client.""" + access_key = os.environ.get('AWS_ACCESS_KEY_ID') + secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY') + region = os.environ.get('AWS_REGION', 'eu-west-3') + + if not access_key or not secret_key: + return None + + return boto3.client( + 's3', + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + region_name=region, + config=Config(signature_version='s3v4') + ) + +def generate_presigned_url(bucket_name, object_key, expiration=3600): + """Generate a presigned URL to share an S3 object.""" + s3_client = get_s3_client() + if not s3_client: + return None + try: + response = s3_client.generate_presigned_url('get_object', + Params={'Bucket': bucket_name, + 'Key': object_key}, + ExpiresIn=expiration) + return response + except ClientError as e: + logger.error(e) + return None + +def list_all_clips(bucket_name=None, limit=50, force_refresh=False): + """ + List recent clips from the S3 bucket by finding metadata files. + Returns a list of dicts containing clip info and signed URLs. + + Args: + bucket_name: S3 bucket name (defaults to AWS_S3_BUCKET env var) + limit: Maximum number of clips to return (default 50 for speed) + force_refresh: If True, bypass cache + """ + global _clips_cache + + # Check cache first + now = time_module.time() + if not force_refresh and _clips_cache["data"] is not None: + if now - _clips_cache["timestamp"] < CACHE_TTL_SECONDS: + cached = _clips_cache["data"] + return cached[:limit] if limit else cached + + if not bucket_name: + bucket_name = os.environ.get('AWS_S3_BUCKET', 'my-clips-bucket') + + s3_client = get_s3_client() + if not s3_client: + return [] + + all_clips = [] + + try: + # List all objects in bucket + # Note: For very large buckets, pagination is needed. + # Assuming reasonable size for now, but adding continuation token support is best practice. + paginator = s3_client.get_paginator('list_objects_v2') + pages = paginator.paginate(Bucket=bucket_name) + + metadata_files = [] + for page in pages: + if 'Contents' in page: + for obj in page['Contents']: + if obj['Key'].endswith('_metadata.json'): + metadata_files.append(obj) + + # Sort metadata by LastModified (newest first) + metadata_files.sort(key=lambda x: x['LastModified'], reverse=True) + + for meta_obj in metadata_files: + key = meta_obj['Key'] + # key format: {job_id}/..._metadata.json + + # Read metadata content + try: + obj_resp = s3_client.get_object(Bucket=bucket_name, Key=key) + content = obj_resp['Body'].read().decode('utf-8') + data = json.loads(content) + + parts = key.split('/') + job_id = parts[0] if len(parts) > 1 else "unknown" + # Filename base for clips in same folder + # Meta key: "job_id/filename_metadata.json" + # Base name in metadata usually matches filename without ext + meta_filename = os.path.basename(key) + base_name = meta_filename.replace('_metadata.json', '') + + clips_data = data.get('shorts', []) + + for i, clip in enumerate(clips_data): + clip_filename = f"{base_name}_clip_{i+1}.mp4" + clip_key = f"{job_id}/{clip_filename}" + + # Generate signed URL + signed_url = generate_presigned_url(bucket_name, clip_key, expiration=7200) # 2 hours + + if signed_url: + all_clips.append({ + "job_id": job_id, + "index": i, + "url": signed_url, + "title": clip.get('video_title_for_youtube_short', 'Untitled Clip'), + "tiktok_desc": clip.get('video_description_for_tiktok', ''), + "insta_desc": clip.get('video_description_for_instagram', ''), + "created_at": meta_obj['LastModified'].isoformat(), + "duration": clip.get('end', 0) - clip.get('start', 0) + }) + + # Early exit if we have enough clips + if limit and len(all_clips) >= limit: + break + + # Early exit if we have enough clips + if limit and len(all_clips) >= limit: + break + + except Exception as e: + logger.error(f"Error processing metadata {key}: {e}") + continue + + except Exception as e: + logger.error(f"Error listing bucket: {e}") + return [] + + # Update cache with full results (keep for pagination later) + _clips_cache["data"] = all_clips + _clips_cache["timestamp"] = now + + return all_clips[:limit] if limit else all_clips + +def upload_actor_to_s3(file_path, description=""): + """ + Upload an actor image to the public S3 bucket. + Returns the public URL or None on failure. + """ + bucket_name = os.environ.get('AWS_S3_PUBLIC_BUCKET', 'my-public-bucket') + region = os.environ.get('AWS_REGION', 'eu-west-3') + + s3_client = get_s3_client() + if not s3_client: + return None + + import uuid + unique_id = str(uuid.uuid4())[:8] + filename = os.path.basename(file_path) + name, ext = os.path.splitext(filename) + s3_key = f"avatars/{name}_{unique_id}{ext}" + + try: + # Skip broken/tiny files + if os.path.getsize(file_path) < 1000: + logger.warning(f"Skipping tiny file ({os.path.getsize(file_path)} bytes): {file_path}") + return None + + s3_client.upload_file( + file_path, bucket_name, s3_key, + ExtraArgs={'ContentType': 'image/png'}, + ) + public_url = f"https://{bucket_name}.s3.{region}.amazonaws.com/{s3_key}" + + # Save metadata JSON alongside the image + if description: + import datetime + meta_key = s3_key.rsplit('.', 1)[0] + '.json' + meta = json.dumps({ + "description": description, + "url": public_url, + "created_at": datetime.datetime.utcnow().isoformat() + "Z", + }, ensure_ascii=False) + s3_client.put_object( + Bucket=bucket_name, Key=meta_key, + Body=meta.encode('utf-8'), + ContentType='application/json', + ) + + logger.info(f"Uploaded actor to S3: {public_url}") + return public_url + except Exception as e: + logger.error(f"Failed to upload actor to S3: {e}") + return None + + +def list_actor_gallery(): + """ + List all actor images from the public S3 bucket. + Returns list with URLs and descriptions, newest first. + """ + bucket_name = os.environ.get('AWS_S3_PUBLIC_BUCKET', 'my-public-bucket') + region = os.environ.get('AWS_REGION', 'eu-west-3') + + s3_client = get_s3_client() + if not s3_client: + return [] + + try: + paginator = s3_client.get_paginator('list_objects_v2') + pages = paginator.paginate(Bucket=bucket_name, Prefix='avatars/') + + all_objects = {} + for page in pages: + for obj in page.get('Contents', []): + key = obj['Key'] + base = key.rsplit('.', 1)[0] + if base not in all_objects: + all_objects[base] = {} + if key.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')): + all_objects[base]['image'] = obj + elif key.endswith('.json'): + all_objects[base]['meta_key'] = key + + images = [] + for base, data in all_objects.items(): + if 'image' not in data: + continue + obj = data['image'] + key = obj['Key'] + public_url = f"https://{bucket_name}.s3.{region}.amazonaws.com/{key}" + entry = { + "url": public_url, + "key": key, + "created_at": obj['LastModified'].isoformat(), + "description": "", + } + # Try to read metadata JSON + if 'meta_key' in data: + try: + meta_resp = s3_client.get_object(Bucket=bucket_name, Key=data['meta_key']) + meta = json.loads(meta_resp['Body'].read().decode('utf-8')) + entry['description'] = meta.get('description', '') + except Exception: + pass + images.append(entry) + + images.sort(key=lambda x: x['created_at'], reverse=True) + return images + + except Exception as e: + logger.error(f"Failed to list actor gallery: {e}") + return [] + + +# ── SaaS Video Gallery (public S3) ────────────────────────────────── + +_video_gallery_cache = { + "data": None, + "timestamp": 0, +} + +def upload_video_to_gallery(video_path, actor_image_path, metadata, video_id=None): + """ + Upload a generated UGC video + actor + metadata to the public S3 bucket. + Returns dict with public URLs or None on failure. + """ + import uuid + bucket_name = os.environ.get('AWS_S3_PUBLIC_BUCKET', 'my-public-bucket') + region = os.environ.get('AWS_REGION', 'eu-west-3') + + s3_client = get_s3_client() + if not s3_client: + return None + + if not video_id: + video_id = str(uuid.uuid4())[:8] + + base_url = f"https://{bucket_name}.s3.{region}.amazonaws.com" + results = {} + + try: + # Upload video + if os.path.exists(video_path): + s3_key = f"videos/{video_id}/video.mp4" + s3_client.upload_file(video_path, bucket_name, s3_key, + ExtraArgs={'ContentType': 'video/mp4'}) + results["video_url"] = f"{base_url}/{s3_key}" + + # Upload actor image + if actor_image_path and os.path.exists(actor_image_path): + s3_key = f"videos/{video_id}/actor.png" + s3_client.upload_file(actor_image_path, bucket_name, s3_key, + ExtraArgs={'ContentType': 'image/png'}) + results["actor_url"] = f"{base_url}/{s3_key}" + + # Build and upload metadata + import datetime + metadata["video_id"] = video_id + metadata["video_url"] = results.get("video_url", "") + metadata["actor_url"] = results.get("actor_url", "") + metadata["created_at"] = datetime.datetime.utcnow().isoformat() + "Z" + + meta_json = json.dumps(metadata, ensure_ascii=False, indent=2) + s3_key = f"videos/{video_id}/metadata.json" + s3_client.put_object( + Bucket=bucket_name, Key=s3_key, + Body=meta_json.encode('utf-8'), + ContentType='application/json', + ) + results["metadata_url"] = f"{base_url}/{s3_key}" + results["video_id"] = video_id + + logger.info(f"Uploaded video gallery: {video_id}") + + # Invalidate cache + _video_gallery_cache["data"] = None + + return results + + except Exception as e: + logger.error(f"Failed to upload video to gallery: {e}") + return None + + +def list_video_gallery(limit=50, force_refresh=False): + """ + List all UGC videos from the public S3 bucket. + Returns list of metadata dicts, newest first. + """ + global _video_gallery_cache + + now = time_module.time() + if not force_refresh and _video_gallery_cache["data"] is not None: + if now - _video_gallery_cache["timestamp"] < CACHE_TTL_SECONDS: + cached = _video_gallery_cache["data"] + return cached[:limit] if limit else cached + + bucket_name = os.environ.get('AWS_S3_PUBLIC_BUCKET', 'my-public-bucket') + + s3_client = get_s3_client() + if not s3_client: + return [] + + videos = [] + + try: + paginator = s3_client.get_paginator('list_objects_v2') + pages = paginator.paginate(Bucket=bucket_name, Prefix='videos/') + + meta_files = [] + for page in pages: + for obj in page.get('Contents', []): + if obj['Key'].endswith('/metadata.json'): + meta_files.append(obj) + + # Newest first + meta_files.sort(key=lambda x: x['LastModified'], reverse=True) + + for meta_obj in meta_files: + try: + obj_resp = s3_client.get_object(Bucket=bucket_name, Key=meta_obj['Key']) + content = obj_resp['Body'].read().decode('utf-8') + data = json.loads(content) + videos.append(data) + if limit and len(videos) >= limit: + break + except Exception as e: + logger.error(f"Error reading metadata {meta_obj['Key']}: {e}") + continue + + except Exception as e: + logger.error(f"Failed to list video gallery: {e}") + return [] + + _video_gallery_cache["data"] = videos + _video_gallery_cache["timestamp"] = now + + return videos[:limit] if limit else videos + + +def upload_job_artifacts(directory, job_id): + """ + Upload all generated clips and metadata for a job to S3. + """ + bucket_name = os.environ.get('AWS_S3_BUCKET', 'my-clips-bucket') + + if not os.path.exists(directory): + return + + for filename in os.listdir(directory): + # Upload .mp4 clips and the metadata JSON + if (filename.endswith(".mp4") or filename.endswith(".json")) and not filename.startswith("temp_"): + file_path = os.path.join(directory, filename) + s3_key = f"{job_id}/{filename}" + upload_file_to_s3(file_path, bucket_name, s3_key) diff --git a/s3_uploader.py b/s3_uploader.py index 18656c2e..59b46449 100644 --- a/s3_uploader.py +++ b/s3_uploader.py @@ -1,446 +1,19 @@ -import os -from dotenv import load_dotenv -load_dotenv() -import boto3 -from botocore.exceptions import ClientError -import logging - -# Configure silent logging for boto3 and botocore -logging.getLogger('boto3').setLevel(logging.CRITICAL) -logging.getLogger('botocore').setLevel(logging.CRITICAL) -logging.getLogger('s3transfer').setLevel(logging.CRITICAL) - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - -def upload_file_to_s3(file_path, bucket_name, s3_key): - """ - Upload a file to an S3 bucket silently. - """ - access_key = os.environ.get('AWS_ACCESS_KEY_ID') - secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY') - region = os.environ.get('AWS_REGION', 'eu-west-3') - - if not access_key or not secret_key: - return False - - s3_client = boto3.client( - 's3', - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - region_name=region - ) - try: - # Extra arguments for public read if needed, but the user didn't specify. - # Given the bucket name, it might be for a web app. - s3_client.upload_file(file_path, bucket_name, s3_key) - return True - except ClientError: - return False - except Exception: - return False - - -from botocore.config import Config -import json -import time as time_module - -# Simple in-memory cache for gallery clips -_clips_cache = { - "data": None, - "timestamp": 0 -} -CACHE_TTL_SECONDS = 300 # 5 minutes - -def get_s3_client(): - """Returns an authenticated S3 client.""" - access_key = os.environ.get('AWS_ACCESS_KEY_ID') - secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY') - region = os.environ.get('AWS_REGION', 'eu-west-3') - - if not access_key or not secret_key: - return None - - return boto3.client( - 's3', - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - region_name=region, - config=Config(signature_version='s3v4') - ) - -def generate_presigned_url(bucket_name, object_key, expiration=3600): - """Generate a presigned URL to share an S3 object.""" - s3_client = get_s3_client() - if not s3_client: - return None - try: - response = s3_client.generate_presigned_url('get_object', - Params={'Bucket': bucket_name, - 'Key': object_key}, - ExpiresIn=expiration) - return response - except ClientError as e: - logger.error(e) - return None - -def list_all_clips(bucket_name=None, limit=50, force_refresh=False): - """ - List recent clips from the S3 bucket by finding metadata files. - Returns a list of dicts containing clip info and signed URLs. - - Args: - bucket_name: S3 bucket name (defaults to AWS_S3_BUCKET env var) - limit: Maximum number of clips to return (default 50 for speed) - force_refresh: If True, bypass cache - """ - global _clips_cache - - # Check cache first - now = time_module.time() - if not force_refresh and _clips_cache["data"] is not None: - if now - _clips_cache["timestamp"] < CACHE_TTL_SECONDS: - cached = _clips_cache["data"] - return cached[:limit] if limit else cached - - if not bucket_name: - bucket_name = os.environ.get('AWS_S3_BUCKET', 'my-clips-bucket') - - s3_client = get_s3_client() - if not s3_client: - return [] - - all_clips = [] - - try: - # List all objects in bucket - # Note: For very large buckets, pagination is needed. - # Assuming reasonable size for now, but adding continuation token support is best practice. - paginator = s3_client.get_paginator('list_objects_v2') - pages = paginator.paginate(Bucket=bucket_name) - - metadata_files = [] - for page in pages: - if 'Contents' in page: - for obj in page['Contents']: - if obj['Key'].endswith('_metadata.json'): - metadata_files.append(obj) - - # Sort metadata by LastModified (newest first) - metadata_files.sort(key=lambda x: x['LastModified'], reverse=True) - - for meta_obj in metadata_files: - key = meta_obj['Key'] - # key format: {job_id}/..._metadata.json - - # Read metadata content - try: - obj_resp = s3_client.get_object(Bucket=bucket_name, Key=key) - content = obj_resp['Body'].read().decode('utf-8') - data = json.loads(content) - - parts = key.split('/') - job_id = parts[0] if len(parts) > 1 else "unknown" - # Filename base for clips in same folder - # Meta key: "job_id/filename_metadata.json" - # Base name in metadata usually matches filename without ext - meta_filename = os.path.basename(key) - base_name = meta_filename.replace('_metadata.json', '') - - clips_data = data.get('shorts', []) - - for i, clip in enumerate(clips_data): - clip_filename = f"{base_name}_clip_{i+1}.mp4" - clip_key = f"{job_id}/{clip_filename}" - - # Generate signed URL - signed_url = generate_presigned_url(bucket_name, clip_key, expiration=7200) # 2 hours - - if signed_url: - all_clips.append({ - "job_id": job_id, - "index": i, - "url": signed_url, - "title": clip.get('video_title_for_youtube_short', 'Untitled Clip'), - "tiktok_desc": clip.get('video_description_for_tiktok', ''), - "insta_desc": clip.get('video_description_for_instagram', ''), - "created_at": meta_obj['LastModified'].isoformat(), - "duration": clip.get('end', 0) - clip.get('start', 0) - }) - - # Early exit if we have enough clips - if limit and len(all_clips) >= limit: - break - - # Early exit if we have enough clips - if limit and len(all_clips) >= limit: - break - - except Exception as e: - logger.error(f"Error processing metadata {key}: {e}") - continue - - except Exception as e: - logger.error(f"Error listing bucket: {e}") - return [] - - # Update cache with full results (keep for pagination later) - _clips_cache["data"] = all_clips - _clips_cache["timestamp"] = now - - return all_clips[:limit] if limit else all_clips - -def upload_actor_to_s3(file_path, description=""): - """ - Upload an actor image to the public S3 bucket. - Returns the public URL or None on failure. - """ - bucket_name = os.environ.get('AWS_S3_PUBLIC_BUCKET', 'my-public-bucket') - region = os.environ.get('AWS_REGION', 'eu-west-3') - - s3_client = get_s3_client() - if not s3_client: - return None - - import uuid - unique_id = str(uuid.uuid4())[:8] - filename = os.path.basename(file_path) - name, ext = os.path.splitext(filename) - s3_key = f"avatars/{name}_{unique_id}{ext}" - - try: - # Skip broken/tiny files - if os.path.getsize(file_path) < 1000: - logger.warning(f"Skipping tiny file ({os.path.getsize(file_path)} bytes): {file_path}") - return None - - s3_client.upload_file( - file_path, bucket_name, s3_key, - ExtraArgs={'ContentType': 'image/png'}, - ) - public_url = f"https://{bucket_name}.s3.{region}.amazonaws.com/{s3_key}" - - # Save metadata JSON alongside the image - if description: - import datetime - meta_key = s3_key.rsplit('.', 1)[0] + '.json' - meta = json.dumps({ - "description": description, - "url": public_url, - "created_at": datetime.datetime.utcnow().isoformat() + "Z", - }, ensure_ascii=False) - s3_client.put_object( - Bucket=bucket_name, Key=meta_key, - Body=meta.encode('utf-8'), - ContentType='application/json', - ) - - logger.info(f"Uploaded actor to S3: {public_url}") - return public_url - except Exception as e: - logger.error(f"Failed to upload actor to S3: {e}") - return None - - -def list_actor_gallery(): - """ - List all actor images from the public S3 bucket. - Returns list with URLs and descriptions, newest first. - """ - bucket_name = os.environ.get('AWS_S3_PUBLIC_BUCKET', 'my-public-bucket') - region = os.environ.get('AWS_REGION', 'eu-west-3') - - s3_client = get_s3_client() - if not s3_client: - return [] - - try: - paginator = s3_client.get_paginator('list_objects_v2') - pages = paginator.paginate(Bucket=bucket_name, Prefix='avatars/') - - all_objects = {} - for page in pages: - for obj in page.get('Contents', []): - key = obj['Key'] - base = key.rsplit('.', 1)[0] - if base not in all_objects: - all_objects[base] = {} - if key.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')): - all_objects[base]['image'] = obj - elif key.endswith('.json'): - all_objects[base]['meta_key'] = key - - images = [] - for base, data in all_objects.items(): - if 'image' not in data: - continue - obj = data['image'] - key = obj['Key'] - public_url = f"https://{bucket_name}.s3.{region}.amazonaws.com/{key}" - entry = { - "url": public_url, - "key": key, - "created_at": obj['LastModified'].isoformat(), - "description": "", - } - # Try to read metadata JSON - if 'meta_key' in data: - try: - meta_resp = s3_client.get_object(Bucket=bucket_name, Key=data['meta_key']) - meta = json.loads(meta_resp['Body'].read().decode('utf-8')) - entry['description'] = meta.get('description', '') - except Exception: - pass - images.append(entry) - - images.sort(key=lambda x: x['created_at'], reverse=True) - return images - - except Exception as e: - logger.error(f"Failed to list actor gallery: {e}") - return [] - - -# ── SaaS Video Gallery (public S3) ────────────────────────────────── - -_video_gallery_cache = { - "data": None, - "timestamp": 0, -} - -def upload_video_to_gallery(video_path, actor_image_path, metadata, video_id=None): - """ - Upload a generated UGC video + actor + metadata to the public S3 bucket. - Returns dict with public URLs or None on failure. - """ - import uuid - bucket_name = os.environ.get('AWS_S3_PUBLIC_BUCKET', 'my-public-bucket') - region = os.environ.get('AWS_REGION', 'eu-west-3') - - s3_client = get_s3_client() - if not s3_client: - return None - - if not video_id: - video_id = str(uuid.uuid4())[:8] - - base_url = f"https://{bucket_name}.s3.{region}.amazonaws.com" - results = {} - - try: - # Upload video - if os.path.exists(video_path): - s3_key = f"videos/{video_id}/video.mp4" - s3_client.upload_file(video_path, bucket_name, s3_key, - ExtraArgs={'ContentType': 'video/mp4'}) - results["video_url"] = f"{base_url}/{s3_key}" - - # Upload actor image - if actor_image_path and os.path.exists(actor_image_path): - s3_key = f"videos/{video_id}/actor.png" - s3_client.upload_file(actor_image_path, bucket_name, s3_key, - ExtraArgs={'ContentType': 'image/png'}) - results["actor_url"] = f"{base_url}/{s3_key}" - - # Build and upload metadata - import datetime - metadata["video_id"] = video_id - metadata["video_url"] = results.get("video_url", "") - metadata["actor_url"] = results.get("actor_url", "") - metadata["created_at"] = datetime.datetime.utcnow().isoformat() + "Z" - - meta_json = json.dumps(metadata, ensure_ascii=False, indent=2) - s3_key = f"videos/{video_id}/metadata.json" - s3_client.put_object( - Bucket=bucket_name, Key=s3_key, - Body=meta_json.encode('utf-8'), - ContentType='application/json', - ) - results["metadata_url"] = f"{base_url}/{s3_key}" - results["video_id"] = video_id - - logger.info(f"Uploaded video gallery: {video_id}") - - # Invalidate cache - _video_gallery_cache["data"] = None - - return results - - except Exception as e: - logger.error(f"Failed to upload video to gallery: {e}") - return None - - -def list_video_gallery(limit=50, force_refresh=False): - """ - List all UGC videos from the public S3 bucket. - Returns list of metadata dicts, newest first. - """ - global _video_gallery_cache - - now = time_module.time() - if not force_refresh and _video_gallery_cache["data"] is not None: - if now - _video_gallery_cache["timestamp"] < CACHE_TTL_SECONDS: - cached = _video_gallery_cache["data"] - return cached[:limit] if limit else cached - - bucket_name = os.environ.get('AWS_S3_PUBLIC_BUCKET', 'my-public-bucket') - - s3_client = get_s3_client() - if not s3_client: - return [] - - videos = [] - - try: - paginator = s3_client.get_paginator('list_objects_v2') - pages = paginator.paginate(Bucket=bucket_name, Prefix='videos/') - - meta_files = [] - for page in pages: - for obj in page.get('Contents', []): - if obj['Key'].endswith('/metadata.json'): - meta_files.append(obj) - - # Newest first - meta_files.sort(key=lambda x: x['LastModified'], reverse=True) - - for meta_obj in meta_files: - try: - obj_resp = s3_client.get_object(Bucket=bucket_name, Key=meta_obj['Key']) - content = obj_resp['Body'].read().decode('utf-8') - data = json.loads(content) - videos.append(data) - if limit and len(videos) >= limit: - break - except Exception as e: - logger.error(f"Error reading metadata {meta_obj['Key']}: {e}") - continue - - except Exception as e: - logger.error(f"Failed to list video gallery: {e}") - return [] - - _video_gallery_cache["data"] = videos - _video_gallery_cache["timestamp"] = now - - return videos[:limit] if limit else videos - - -def upload_job_artifacts(directory, job_id): - """ - Upload all generated clips and metadata for a job to S3. - """ - bucket_name = os.environ.get('AWS_S3_BUCKET', 'my-clips-bucket') - - if not os.path.exists(directory): - return - - for filename in os.listdir(directory): - # Upload .mp4 clips and the metadata JSON - if (filename.endswith(".mp4") or filename.endswith(".json")) and not filename.startswith("temp_"): - file_path = os.path.join(directory, filename) - s3_key = f"{job_id}/{filename}" - upload_file_to_s3(file_path, bucket_name, s3_key) - - +"""Compat shim: re-exports openshorts.integrations.s3 at the original import path. + +This module moved to openshorts/integrations/s3.py as part of the restructure. +New code should import from `openshorts.integrations.s3` directly; this shim +keeps existing imports (e.g. `from s3_uploader import upload_job_artifacts`) +working while the restructure is in flight. +""" +from openshorts.integrations.s3 import * # noqa: F401,F403 +from openshorts.integrations.s3 import ( # noqa: F401 + upload_file_to_s3, + get_s3_client, + generate_presigned_url, + list_all_clips, + upload_actor_to_s3, + list_actor_gallery, + upload_video_to_gallery, + list_video_gallery, + upload_job_artifacts, +) From e68fa9858861f1716eba836facec832bca25ef6b Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse Date: Tue, 19 May 2026 15:46:45 -0400 Subject: [PATCH 04/43] chore(restructure): move translate -> openshorts/integrations/elevenlabs.py Phase 1 step 2: relocate the ElevenLabs dubbing client. Only app.py imports from translate. Adds a re-export shim at the old path. Tests stay 62/62 green. Co-Authored-By: Claude Opus 4.7 (1M context) --- openshorts/integrations/elevenlabs.py | 235 ++++++++++++++++++++++++ translate.py | 251 ++------------------------ 2 files changed, 249 insertions(+), 237 deletions(-) create mode 100644 openshorts/integrations/elevenlabs.py diff --git a/openshorts/integrations/elevenlabs.py b/openshorts/integrations/elevenlabs.py new file mode 100644 index 00000000..e5f65c72 --- /dev/null +++ b/openshorts/integrations/elevenlabs.py @@ -0,0 +1,235 @@ +"""ElevenLabs Dubbing API client: AI voice translation across 30+ languages.""" + +import os +import time +import httpx +from typing import Optional + +ELEVENLABS_API_BASE = "https://api.elevenlabs.io/v1" + +# Supported target languages for dubbing +SUPPORTED_LANGUAGES = { + "en": "English", + "es": "Spanish", + "fr": "French", + "de": "German", + "it": "Italian", + "pt": "Portuguese", + "pl": "Polish", + "hi": "Hindi", + "ja": "Japanese", + "ko": "Korean", + "zh": "Chinese", + "ar": "Arabic", + "ru": "Russian", + "tr": "Turkish", + "nl": "Dutch", + "sv": "Swedish", + "id": "Indonesian", + "fil": "Filipino", + "ms": "Malay", + "vi": "Vietnamese", + "th": "Thai", + "uk": "Ukrainian", + "el": "Greek", + "cs": "Czech", + "fi": "Finnish", + "ro": "Romanian", + "da": "Danish", + "bg": "Bulgarian", + "hr": "Croatian", + "sk": "Slovak", + "ta": "Tamil", +} + + +def create_dubbing_project( + video_path: str, + target_language: str, + api_key: str, + source_language: Optional[str] = None, +) -> dict: + """ + Create a new dubbing project with ElevenLabs. + + Args: + video_path: Path to the video file + target_language: Target language code (e.g., 'es', 'fr', 'de') + api_key: ElevenLabs API key + source_language: Source language code (auto-detected if None) + + Returns: + dict with dubbing_id and expected_duration_sec + """ + url = f"{ELEVENLABS_API_BASE}/dubbing" + + headers = { + "xi-api-key": api_key, + } + + # Prepare form data + data = { + "target_lang": target_language, + "mode": "automatic", + "num_speakers": "0", + "watermark": "false", + } + + if source_language: + data["source_lang"] = source_language + + # Open and send the video file + with open(video_path, "rb") as video_file: + files = { + "file": (os.path.basename(video_path), video_file, "video/mp4") + } + + print(f"[ElevenLabs] Creating dubbing project for {target_language}...") + with httpx.Client(timeout=300.0) as client: + response = client.post(url, headers=headers, data=data, files=files) + + if response.status_code not in [200, 201]: + error_msg = response.text + try: + error_data = response.json() + error_msg = error_data.get("detail", {}).get("message", response.text) + except: + pass + raise Exception(f"ElevenLabs API error: {error_msg}") + + result = response.json() + print(f"[ElevenLabs] Dubbing project created: {result.get('dubbing_id')}") + return result + + +def get_dubbing_status(dubbing_id: str, api_key: str) -> dict: + """ + Check the status of a dubbing project. + + Returns: + dict with status ('dubbing', 'dubbed', 'failed') and other metadata + """ + url = f"{ELEVENLABS_API_BASE}/dubbing/{dubbing_id}" + + headers = { + "xi-api-key": api_key, + } + + with httpx.Client(timeout=30.0) as client: + response = client.get(url, headers=headers) + + if response.status_code != 200: + raise Exception(f"Failed to get dubbing status: {response.text}") + + return response.json() + + +def download_dubbed_video( + dubbing_id: str, + target_language: str, + output_path: str, + api_key: str +) -> str: + """ + Download the dubbed video file. + + Args: + dubbing_id: The dubbing project ID + target_language: Target language code + output_path: Where to save the dubbed video + api_key: ElevenLabs API key + + Returns: + Path to the downloaded file + """ + url = f"{ELEVENLABS_API_BASE}/dubbing/{dubbing_id}/audio/{target_language}" + + headers = { + "xi-api-key": api_key, + } + + print(f"[ElevenLabs] Downloading dubbed video...") + with httpx.Client(timeout=120.0) as client: + with client.stream("GET", url, headers=headers) as response: + if response.status_code != 200: + raise Exception(f"Failed to download dubbed video: {response.text}") + + with open(output_path, "wb") as f: + for chunk in response.iter_bytes(chunk_size=8192): + f.write(chunk) + + print(f"[ElevenLabs] Dubbed video saved to: {output_path}") + return output_path + + +def translate_video( + video_path: str, + output_path: str, + target_language: str, + api_key: str, + source_language: Optional[str] = None, + max_wait_seconds: int = 600, + poll_interval: int = 5, +) -> str: + """ + Translate a video to a target language using ElevenLabs dubbing. + + This is a blocking call that waits for the dubbing to complete. + + Args: + video_path: Path to input video + output_path: Path to save translated video + target_language: Target language code + api_key: ElevenLabs API key + source_language: Source language code (auto-detected if None) + max_wait_seconds: Maximum time to wait for dubbing (default 10 min) + poll_interval: Seconds between status checks + + Returns: + Path to the translated video + """ + # Create dubbing project + project = create_dubbing_project( + video_path=video_path, + target_language=target_language, + api_key=api_key, + source_language=source_language, + ) + + dubbing_id = project["dubbing_id"] + expected_duration = project.get("expected_duration_sec", 60) + + print(f"[ElevenLabs] Dubbing ID: {dubbing_id}, Expected duration: {expected_duration}s") + + # Poll for completion + start_time = time.time() + while True: + elapsed = time.time() - start_time + if elapsed > max_wait_seconds: + raise Exception(f"Dubbing timed out after {max_wait_seconds} seconds") + + status = get_dubbing_status(dubbing_id, api_key) + current_status = status.get("status", "unknown") + + print(f"[ElevenLabs] Status: {current_status} (elapsed: {int(elapsed)}s)") + + if current_status == "dubbed": + # Download the result + return download_dubbed_video( + dubbing_id=dubbing_id, + target_language=target_language, + output_path=output_path, + api_key=api_key, + ) + + elif current_status == "failed": + error = status.get("error", "Unknown error") + raise Exception(f"Dubbing failed: {error}") + + # Still processing, wait and poll again + time.sleep(poll_interval) + + +def get_supported_languages() -> dict: + """Return dict of supported language codes and names.""" + return SUPPORTED_LANGUAGES.copy() diff --git a/translate.py b/translate.py index d8bc703f..1bb037b9 100644 --- a/translate.py +++ b/translate.py @@ -1,239 +1,16 @@ -""" -ElevenLabs Video Translation/Dubbing Module +"""Compat shim: re-exports openshorts.integrations.elevenlabs at the original path. -Uses ElevenLabs Dubbing API to translate video audio to different languages. +This module moved to openshorts/integrations/elevenlabs.py as part of the +restructure. New code should import from `openshorts.integrations.elevenlabs` +directly; this shim keeps existing `from translate import ...` calls working. """ - -import os -import time -import httpx -from typing import Optional - -ELEVENLABS_API_BASE = "https://api.elevenlabs.io/v1" - -# Supported target languages for dubbing -SUPPORTED_LANGUAGES = { - "en": "English", - "es": "Spanish", - "fr": "French", - "de": "German", - "it": "Italian", - "pt": "Portuguese", - "pl": "Polish", - "hi": "Hindi", - "ja": "Japanese", - "ko": "Korean", - "zh": "Chinese", - "ar": "Arabic", - "ru": "Russian", - "tr": "Turkish", - "nl": "Dutch", - "sv": "Swedish", - "id": "Indonesian", - "fil": "Filipino", - "ms": "Malay", - "vi": "Vietnamese", - "th": "Thai", - "uk": "Ukrainian", - "el": "Greek", - "cs": "Czech", - "fi": "Finnish", - "ro": "Romanian", - "da": "Danish", - "bg": "Bulgarian", - "hr": "Croatian", - "sk": "Slovak", - "ta": "Tamil", -} - - -def create_dubbing_project( - video_path: str, - target_language: str, - api_key: str, - source_language: Optional[str] = None, -) -> dict: - """ - Create a new dubbing project with ElevenLabs. - - Args: - video_path: Path to the video file - target_language: Target language code (e.g., 'es', 'fr', 'de') - api_key: ElevenLabs API key - source_language: Source language code (auto-detected if None) - - Returns: - dict with dubbing_id and expected_duration_sec - """ - url = f"{ELEVENLABS_API_BASE}/dubbing" - - headers = { - "xi-api-key": api_key, - } - - # Prepare form data - data = { - "target_lang": target_language, - "mode": "automatic", - "num_speakers": "0", - "watermark": "false", - } - - if source_language: - data["source_lang"] = source_language - - # Open and send the video file - with open(video_path, "rb") as video_file: - files = { - "file": (os.path.basename(video_path), video_file, "video/mp4") - } - - print(f"[ElevenLabs] Creating dubbing project for {target_language}...") - with httpx.Client(timeout=300.0) as client: - response = client.post(url, headers=headers, data=data, files=files) - - if response.status_code not in [200, 201]: - error_msg = response.text - try: - error_data = response.json() - error_msg = error_data.get("detail", {}).get("message", response.text) - except: - pass - raise Exception(f"ElevenLabs API error: {error_msg}") - - result = response.json() - print(f"[ElevenLabs] Dubbing project created: {result.get('dubbing_id')}") - return result - - -def get_dubbing_status(dubbing_id: str, api_key: str) -> dict: - """ - Check the status of a dubbing project. - - Returns: - dict with status ('dubbing', 'dubbed', 'failed') and other metadata - """ - url = f"{ELEVENLABS_API_BASE}/dubbing/{dubbing_id}" - - headers = { - "xi-api-key": api_key, - } - - with httpx.Client(timeout=30.0) as client: - response = client.get(url, headers=headers) - - if response.status_code != 200: - raise Exception(f"Failed to get dubbing status: {response.text}") - - return response.json() - - -def download_dubbed_video( - dubbing_id: str, - target_language: str, - output_path: str, - api_key: str -) -> str: - """ - Download the dubbed video file. - - Args: - dubbing_id: The dubbing project ID - target_language: Target language code - output_path: Where to save the dubbed video - api_key: ElevenLabs API key - - Returns: - Path to the downloaded file - """ - url = f"{ELEVENLABS_API_BASE}/dubbing/{dubbing_id}/audio/{target_language}" - - headers = { - "xi-api-key": api_key, - } - - print(f"[ElevenLabs] Downloading dubbed video...") - with httpx.Client(timeout=120.0) as client: - with client.stream("GET", url, headers=headers) as response: - if response.status_code != 200: - raise Exception(f"Failed to download dubbed video: {response.text}") - - with open(output_path, "wb") as f: - for chunk in response.iter_bytes(chunk_size=8192): - f.write(chunk) - - print(f"[ElevenLabs] Dubbed video saved to: {output_path}") - return output_path - - -def translate_video( - video_path: str, - output_path: str, - target_language: str, - api_key: str, - source_language: Optional[str] = None, - max_wait_seconds: int = 600, - poll_interval: int = 5, -) -> str: - """ - Translate a video to a target language using ElevenLabs dubbing. - - This is a blocking call that waits for the dubbing to complete. - - Args: - video_path: Path to input video - output_path: Path to save translated video - target_language: Target language code - api_key: ElevenLabs API key - source_language: Source language code (auto-detected if None) - max_wait_seconds: Maximum time to wait for dubbing (default 10 min) - poll_interval: Seconds between status checks - - Returns: - Path to the translated video - """ - # Create dubbing project - project = create_dubbing_project( - video_path=video_path, - target_language=target_language, - api_key=api_key, - source_language=source_language, - ) - - dubbing_id = project["dubbing_id"] - expected_duration = project.get("expected_duration_sec", 60) - - print(f"[ElevenLabs] Dubbing ID: {dubbing_id}, Expected duration: {expected_duration}s") - - # Poll for completion - start_time = time.time() - while True: - elapsed = time.time() - start_time - if elapsed > max_wait_seconds: - raise Exception(f"Dubbing timed out after {max_wait_seconds} seconds") - - status = get_dubbing_status(dubbing_id, api_key) - current_status = status.get("status", "unknown") - - print(f"[ElevenLabs] Status: {current_status} (elapsed: {int(elapsed)}s)") - - if current_status == "dubbed": - # Download the result - return download_dubbed_video( - dubbing_id=dubbing_id, - target_language=target_language, - output_path=output_path, - api_key=api_key, - ) - - elif current_status == "failed": - error = status.get("error", "Unknown error") - raise Exception(f"Dubbing failed: {error}") - - # Still processing, wait and poll again - time.sleep(poll_interval) - - -def get_supported_languages() -> dict: - """Return dict of supported language codes and names.""" - return SUPPORTED_LANGUAGES.copy() +from openshorts.integrations.elevenlabs import * # noqa: F401,F403 +from openshorts.integrations.elevenlabs import ( # noqa: F401 + SUPPORTED_LANGUAGES, + ELEVENLABS_API_BASE, + create_dubbing_project, + get_dubbing_status, + download_dubbed_video, + translate_video, + get_supported_languages, +) From 03375d4c9a13cafdd530d100f750e17683bd6cef Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse Date: Tue, 19 May 2026 15:47:45 -0400 Subject: [PATCH 05/43] chore(restructure): move hooks -> openshorts/overlays/hooks.py Phase 1 step 3: relocate the PIL hook-card generator + FFmpeg overlay helper. Imported by app.py and the three root verify_*.py scripts. Adds a re-export shim at the old path. Tests stay 62/62 green. Co-Authored-By: Claude Opus 4.7 (1M context) --- hooks.py | 256 ++--------------------------------- openshorts/overlays/hooks.py | 242 +++++++++++++++++++++++++++++++++ 2 files changed, 257 insertions(+), 241 deletions(-) create mode 100644 openshorts/overlays/hooks.py diff --git a/hooks.py b/hooks.py index 36490085..b096f4fd 100644 --- a/hooks.py +++ b/hooks.py @@ -1,241 +1,15 @@ -import os -import textwrap -import subprocess -import urllib.request -from PIL import Image, ImageDraw, ImageFont, ImageFilter - -FONT_URL = "https://github.com/googlefonts/noto-fonts/raw/main/hinted/ttf/NotoSerif/NotoSerif-Bold.ttf" -FONT_DIR = "fonts" -FONT_PATH = os.path.join(FONT_DIR, "NotoSerif-Bold.ttf") - -def download_font_if_needed(): - """Downloads a serif font for the hook text if not present.""" - if not os.path.exists(FONT_DIR): - os.makedirs(FONT_DIR) - if not os.path.exists(FONT_PATH): - print(f"⬇️ Downloading font from {FONT_URL}...") - try: - # Add user agent to avoid 403s slightly - req = urllib.request.Request( - FONT_URL, - headers={'User-Agent': 'Mozilla/5.0'} - ) - with urllib.request.urlopen(req) as response, open(FONT_PATH, 'wb') as out_file: - out_file.write(response.read()) - print("✅ Font downloaded.") - except Exception as e: - print(f"❌ Failed to download font: {e}") - -def create_hook_image(text, target_width, output_image_path="hook_overlay.png", font_scale=1.0): - """ - Generates a white box with black serif text using pixel-based wrapping. - target_width: The max width the box should occupy (e.g. 85% of video) - """ - download_font_if_needed() - - # Configuration - padding_x = 30 # Balanced padding - padding_y = 25 - line_spacing = 20 # Increased spacing - cornerradius = 20 - shadow_offset = (5, 5) - shadow_blur = 10 - - # Font Size Calculation (approx 5% of width - tuned to match Noto Serif Bold metrics in browser) - base_font_size = int(target_width * 0.05) - font_size = int(base_font_size * font_scale) - - try: - font = ImageFont.truetype(FONT_PATH, font_size) - except Exception as e: - print(f"⚠️ Warning: Could not load font {FONT_PATH}, using default. Error: {e}") - font = ImageFont.load_default() - - # Wrap text logic (Pixel-based) - dummy_img = Image.new('RGBA', (1, 1)) - draw = ImageDraw.Draw(dummy_img) - - max_text_width = target_width - (2 * padding_x) - - # Handle manual newlines first - paragraphs = text.split('\n') - lines = [] - - for p in paragraphs: - if not p.strip(): - lines.append("") - continue - - words = p.split() - current_line = [] - - for word in words: - # Test if adding word fits - test_line = ' '.join(current_line + [word]) - bbox = draw.textbbox((0, 0), test_line, font=font) - w = bbox[2] - bbox[0] - - if w <= max_text_width: - current_line.append(word) - else: - # Line full, push current_line and start new - if current_line: - lines.append(' '.join(current_line)) - current_line = [word] - else: - # Single word too long? Force it. - lines.append(word) - current_line = [] - - if current_line: - lines.append(' '.join(current_line)) - - # Recalculate true width/height - max_line_width = 0 - text_heights = [] - - for line in lines: - if not line: - text_heights.append(font_size) # Use font size for empty line height - continue - - bbox = draw.textbbox((0, 0), line, font=font) - w = bbox[2] - bbox[0] - h = bbox[3] - bbox[1] - max_line_width = max(max_line_width, w) - text_heights.append(h) - - # Box dimensions - # We want the box to fit the text exactly + padding - # Ensure min width for aesthetic reasons if text is short (at least 30% of target) - box_width = max(max_line_width + (2 * padding_x), int(target_width * 0.3)) - - # Total Text Height: sum(heights) + spacing * (n-1) - if not text_heights: - total_text_height = font_size - else: - total_text_height = sum(text_heights) + (len(text_heights) - 1) * line_spacing - - box_height = total_text_height + (2 * padding_y) - - # Create Final Image with Rounded Corners and Shadow - # 1. Canvas for Shadow (larger than box) - canvas_w = box_width + 40 - canvas_h = box_height + 40 - - img = Image.new('RGBA', (canvas_w, canvas_h), (0, 0, 0, 0)) - draw = ImageDraw.Draw(img) - - # 2. Draw Shadow - shadow_box = [ - (20 + shadow_offset[0], 20 + shadow_offset[1]), - (20 + box_width + shadow_offset[0], 20 + box_height + shadow_offset[1]) - ] - draw.rounded_rectangle(shadow_box, radius=cornerradius, fill=(0, 0, 0, 100)) - - # 3. Blur Shadow - img = img.filter(ImageFilter.GaussianBlur(5)) - - # 4. Draw White Box (sharper, on top of blurred shadow) - draw_final = ImageDraw.Draw(img) - - main_box = [ - (20, 20), - (20 + box_width, 20 + box_height) - ] - # Semi-transparent white (240/255 alpha ~ 94% opacity) - draw_final.rounded_rectangle(main_box, radius=cornerradius, fill=(255, 255, 255, 240)) - - # 5. Draw Text - current_y = 20 + padding_y - 2 # Minor visual adjustment - for i, line in enumerate(lines): - if not line: - current_y += font_size + line_spacing - continue - - bbox = draw_final.textbbox((0, 0), line, font=font) - line_w = bbox[2] - bbox[0] - line_h = text_heights[i] if i < len(text_heights) else bbox[3] - bbox[1] - - # Center X - x = 20 + (box_width - line_w) // 2 - - # Draw Black Text - draw_final.text((x, current_y), line, font=font, fill="black") - - current_y += line_h + line_spacing - - img.save(output_image_path) - return output_image_path, canvas_w, canvas_h - -def add_hook_to_video(video_path, text, output_path, position="top", font_scale=1.0): - """ - Overlays text hook onto video. - position: 'top', 'center', 'bottom' - font_scale: float multiplier (1.0 = default) - """ - if not os.path.exists(video_path): - raise FileNotFoundError(f"Video {video_path} not found") - - # 1. Probe video width to scale text properly - try: - cmd = ['ffprobe', '-v', 'error', '-show_entries', 'stream=width,height', '-of', 'csv=s=x:p=0', video_path] - res = subprocess.check_output(cmd).decode().strip() - # Takes first stream if multiple - dims = res.split('\n')[0].split('x') - video_width = int(dims[0]) - video_height = int(dims[1]) - except Exception as e: - print(f"⚠️ FFprobe failed: {e}. Assuming 1080x1920") - video_width = 1080 - video_height = 1920 - - # 2. Generate Image - # Box check: Don't let it be wider than 90% of screen - target_box_width = int(video_width * 0.9) - - hook_filename = f"temp_hook_{os.path.basename(video_path)}.png" - # Ensure unique or temp location if needed, but relative is fine for this app structure - - try: - img_path, box_w, box_h = create_hook_image(text, target_box_width, hook_filename, font_scale=font_scale) - - # 3. Calculate Overlay Position - overlay_x = (video_width - box_w) // 2 - - if position == "center": - overlay_y = (video_height - box_h) // 2 - elif position == "bottom": - # Bottom 20% mark (approx) - overlay_y = int(video_height * 0.70) - else: - # Top 20% mark - overlay_y = int(video_height * 0.20) - - # 4. FFmpeg Command - print(f"🎬 Overlaying hook: '{text}' at {overlay_x},{overlay_y}") - - ffmpeg_cmd = [ - 'ffmpeg', '-y', - '-i', video_path, - '-i', img_path, - '-filter_complex', f"[0:v][1:v]overlay={overlay_x}:{overlay_y}", - '-c:a', 'copy', - '-c:v', 'libx264', '-preset', 'fast', '-crf', '22', - output_path - ] - - subprocess.run(ffmpeg_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - print(f"✅ Hook added to {output_path}") - return True - - except subprocess.CalledProcessError as e: - print(f"❌ FFmpeg Error: {e.stderr.decode() if e.stderr else 'Unknown'}") - raise e - except Exception as e: - print(f"❌ Hook Gen Error: {e}") - raise e - finally: - # Cleanup temp image - if os.path.exists(hook_filename): - os.remove(hook_filename) +"""Compat shim: re-exports openshorts.overlays.hooks at the original import path. + +This module moved to openshorts/overlays/hooks.py as part of the restructure. +New code should import from `openshorts.overlays.hooks` directly; this shim +keeps existing `from hooks import ...` calls working. +""" +from openshorts.overlays.hooks import * # noqa: F401,F403 +from openshorts.overlays.hooks import ( # noqa: F401 + FONT_URL, + FONT_DIR, + FONT_PATH, + download_font_if_needed, + create_hook_image, + add_hook_to_video, +) diff --git a/openshorts/overlays/hooks.py b/openshorts/overlays/hooks.py new file mode 100644 index 00000000..e0bb217f --- /dev/null +++ b/openshorts/overlays/hooks.py @@ -0,0 +1,242 @@ +"""Hook text overlays: PIL-rendered cards (PNG) burned onto video via FFmpeg.""" +import os +import textwrap +import subprocess +import urllib.request +from PIL import Image, ImageDraw, ImageFont, ImageFilter + +FONT_URL = "https://github.com/googlefonts/noto-fonts/raw/main/hinted/ttf/NotoSerif/NotoSerif-Bold.ttf" +FONT_DIR = "fonts" +FONT_PATH = os.path.join(FONT_DIR, "NotoSerif-Bold.ttf") + +def download_font_if_needed(): + """Downloads a serif font for the hook text if not present.""" + if not os.path.exists(FONT_DIR): + os.makedirs(FONT_DIR) + if not os.path.exists(FONT_PATH): + print(f"⬇️ Downloading font from {FONT_URL}...") + try: + # Add user agent to avoid 403s slightly + req = urllib.request.Request( + FONT_URL, + headers={'User-Agent': 'Mozilla/5.0'} + ) + with urllib.request.urlopen(req) as response, open(FONT_PATH, 'wb') as out_file: + out_file.write(response.read()) + print("✅ Font downloaded.") + except Exception as e: + print(f"❌ Failed to download font: {e}") + +def create_hook_image(text, target_width, output_image_path="hook_overlay.png", font_scale=1.0): + """ + Generates a white box with black serif text using pixel-based wrapping. + target_width: The max width the box should occupy (e.g. 85% of video) + """ + download_font_if_needed() + + # Configuration + padding_x = 30 # Balanced padding + padding_y = 25 + line_spacing = 20 # Increased spacing + cornerradius = 20 + shadow_offset = (5, 5) + shadow_blur = 10 + + # Font Size Calculation (approx 5% of width - tuned to match Noto Serif Bold metrics in browser) + base_font_size = int(target_width * 0.05) + font_size = int(base_font_size * font_scale) + + try: + font = ImageFont.truetype(FONT_PATH, font_size) + except Exception as e: + print(f"⚠️ Warning: Could not load font {FONT_PATH}, using default. Error: {e}") + font = ImageFont.load_default() + + # Wrap text logic (Pixel-based) + dummy_img = Image.new('RGBA', (1, 1)) + draw = ImageDraw.Draw(dummy_img) + + max_text_width = target_width - (2 * padding_x) + + # Handle manual newlines first + paragraphs = text.split('\n') + lines = [] + + for p in paragraphs: + if not p.strip(): + lines.append("") + continue + + words = p.split() + current_line = [] + + for word in words: + # Test if adding word fits + test_line = ' '.join(current_line + [word]) + bbox = draw.textbbox((0, 0), test_line, font=font) + w = bbox[2] - bbox[0] + + if w <= max_text_width: + current_line.append(word) + else: + # Line full, push current_line and start new + if current_line: + lines.append(' '.join(current_line)) + current_line = [word] + else: + # Single word too long? Force it. + lines.append(word) + current_line = [] + + if current_line: + lines.append(' '.join(current_line)) + + # Recalculate true width/height + max_line_width = 0 + text_heights = [] + + for line in lines: + if not line: + text_heights.append(font_size) # Use font size for empty line height + continue + + bbox = draw.textbbox((0, 0), line, font=font) + w = bbox[2] - bbox[0] + h = bbox[3] - bbox[1] + max_line_width = max(max_line_width, w) + text_heights.append(h) + + # Box dimensions + # We want the box to fit the text exactly + padding + # Ensure min width for aesthetic reasons if text is short (at least 30% of target) + box_width = max(max_line_width + (2 * padding_x), int(target_width * 0.3)) + + # Total Text Height: sum(heights) + spacing * (n-1) + if not text_heights: + total_text_height = font_size + else: + total_text_height = sum(text_heights) + (len(text_heights) - 1) * line_spacing + + box_height = total_text_height + (2 * padding_y) + + # Create Final Image with Rounded Corners and Shadow + # 1. Canvas for Shadow (larger than box) + canvas_w = box_width + 40 + canvas_h = box_height + 40 + + img = Image.new('RGBA', (canvas_w, canvas_h), (0, 0, 0, 0)) + draw = ImageDraw.Draw(img) + + # 2. Draw Shadow + shadow_box = [ + (20 + shadow_offset[0], 20 + shadow_offset[1]), + (20 + box_width + shadow_offset[0], 20 + box_height + shadow_offset[1]) + ] + draw.rounded_rectangle(shadow_box, radius=cornerradius, fill=(0, 0, 0, 100)) + + # 3. Blur Shadow + img = img.filter(ImageFilter.GaussianBlur(5)) + + # 4. Draw White Box (sharper, on top of blurred shadow) + draw_final = ImageDraw.Draw(img) + + main_box = [ + (20, 20), + (20 + box_width, 20 + box_height) + ] + # Semi-transparent white (240/255 alpha ~ 94% opacity) + draw_final.rounded_rectangle(main_box, radius=cornerradius, fill=(255, 255, 255, 240)) + + # 5. Draw Text + current_y = 20 + padding_y - 2 # Minor visual adjustment + for i, line in enumerate(lines): + if not line: + current_y += font_size + line_spacing + continue + + bbox = draw_final.textbbox((0, 0), line, font=font) + line_w = bbox[2] - bbox[0] + line_h = text_heights[i] if i < len(text_heights) else bbox[3] - bbox[1] + + # Center X + x = 20 + (box_width - line_w) // 2 + + # Draw Black Text + draw_final.text((x, current_y), line, font=font, fill="black") + + current_y += line_h + line_spacing + + img.save(output_image_path) + return output_image_path, canvas_w, canvas_h + +def add_hook_to_video(video_path, text, output_path, position="top", font_scale=1.0): + """ + Overlays text hook onto video. + position: 'top', 'center', 'bottom' + font_scale: float multiplier (1.0 = default) + """ + if not os.path.exists(video_path): + raise FileNotFoundError(f"Video {video_path} not found") + + # 1. Probe video width to scale text properly + try: + cmd = ['ffprobe', '-v', 'error', '-show_entries', 'stream=width,height', '-of', 'csv=s=x:p=0', video_path] + res = subprocess.check_output(cmd).decode().strip() + # Takes first stream if multiple + dims = res.split('\n')[0].split('x') + video_width = int(dims[0]) + video_height = int(dims[1]) + except Exception as e: + print(f"⚠️ FFprobe failed: {e}. Assuming 1080x1920") + video_width = 1080 + video_height = 1920 + + # 2. Generate Image + # Box check: Don't let it be wider than 90% of screen + target_box_width = int(video_width * 0.9) + + hook_filename = f"temp_hook_{os.path.basename(video_path)}.png" + # Ensure unique or temp location if needed, but relative is fine for this app structure + + try: + img_path, box_w, box_h = create_hook_image(text, target_box_width, hook_filename, font_scale=font_scale) + + # 3. Calculate Overlay Position + overlay_x = (video_width - box_w) // 2 + + if position == "center": + overlay_y = (video_height - box_h) // 2 + elif position == "bottom": + # Bottom 20% mark (approx) + overlay_y = int(video_height * 0.70) + else: + # Top 20% mark + overlay_y = int(video_height * 0.20) + + # 4. FFmpeg Command + print(f"🎬 Overlaying hook: '{text}' at {overlay_x},{overlay_y}") + + ffmpeg_cmd = [ + 'ffmpeg', '-y', + '-i', video_path, + '-i', img_path, + '-filter_complex', f"[0:v][1:v]overlay={overlay_x}:{overlay_y}", + '-c:a', 'copy', + '-c:v', 'libx264', '-preset', 'fast', '-crf', '22', + output_path + ] + + subprocess.run(ffmpeg_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + print(f"✅ Hook added to {output_path}") + return True + + except subprocess.CalledProcessError as e: + print(f"❌ FFmpeg Error: {e.stderr.decode() if e.stderr else 'Unknown'}") + raise e + except Exception as e: + print(f"❌ Hook Gen Error: {e}") + raise e + finally: + # Cleanup temp image + if os.path.exists(hook_filename): + os.remove(hook_filename) From 285b30fb59b5562ed4bb048866b7f37b08a7e0c9 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse Date: Tue, 19 May 2026 15:48:40 -0400 Subject: [PATCH 06/43] chore(restructure): split subtitles -> overlays/subtitles_{generate,render}.py Phase 1 step 4: separate concerns. Generation (faster-whisper + SRT writing) lives in subtitles_generate.py; FFmpeg burn-in + ASS color conversion lives in subtitles_render.py. Shim at the old path preserves existing imports. Tests stay 62/62 green. Co-Authored-By: Claude Opus 4.7 (1M context) --- openshorts/overlays/subtitles_generate.py | 137 ++++++++++++ openshorts/overlays/subtitles_render.py | 93 +++++++++ subtitles.py | 243 ++-------------------- 3 files changed, 249 insertions(+), 224 deletions(-) create mode 100644 openshorts/overlays/subtitles_generate.py create mode 100644 openshorts/overlays/subtitles_render.py diff --git a/openshorts/overlays/subtitles_generate.py b/openshorts/overlays/subtitles_generate.py new file mode 100644 index 00000000..a4601de7 --- /dev/null +++ b/openshorts/overlays/subtitles_generate.py @@ -0,0 +1,137 @@ +"""SRT subtitle generation: transcription and word-level grouping into short lines.""" + +import os +import subprocess + + +def transcribe_audio(video_path): + """ + Transcribe audio from a video file using faster-whisper. + Returns transcript in the same format as main.py for compatibility. + """ + from faster_whisper import WhisperModel + + print(f"🎙️ Transcribing audio from: {video_path}") + + # Run on CPU with INT8 quantization for speed + model = WhisperModel("base", device="cpu", compute_type="int8") + + segments, info = model.transcribe(video_path, word_timestamps=True) + + transcript = { + "segments": [], + "language": info.language + } + + for segment in segments: + seg_data = { + "start": segment.start, + "end": segment.end, + "text": segment.text, + "words": [] + } + if segment.words: + for word in segment.words: + seg_data["words"].append({ + "word": word.word.strip(), + "start": word.start, + "end": word.end + }) + transcript["segments"].append(seg_data) + + print(f"✅ Transcription complete. Language: {info.language}") + return transcript + + +def generate_srt_from_video(video_path, output_path, max_chars=20, max_duration=2.0): + """ + Transcribe a video and generate SRT directly. + Used for dubbed videos that don't have a pre-existing transcript. + """ + transcript = transcribe_audio(video_path) + + # Get video duration to use as clip_end + import cv2 + cap = cv2.VideoCapture(video_path) + fps = cap.get(cv2.CAP_PROP_FPS) + frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + duration = frame_count / fps if fps else 0 + cap.release() + + return generate_srt(transcript, 0, duration, output_path, max_chars, max_duration) + + +def generate_srt(transcript, clip_start, clip_end, output_path, max_chars=20, max_duration=2.0): + """ + Generates an SRT file from the transcript for a specific time range. + Groups words into short lines suitable for vertical video. + """ + + words = [] + # 1. Extract and flatten words within range + for segment in transcript.get('segments', []): + for word_info in segment.get('words', []): + # Check overlap + if word_info['end'] > clip_start and word_info['start'] < clip_end: + words.append(word_info) + + if not words: + return False + + srt_content = "" + index = 1 + + current_block = [] + block_start = None + + for i, word in enumerate(words): + # Adjust times relative to clip + start = max(0, word['start'] - clip_start) + end = max(0, word['end'] - clip_start) + + # Clip to video duration logic handled by ffmpeg usually, but good to be safe + + if not current_block: + current_block.append(word) + block_start = start + else: + # Decide whether to close block + current_text_len = sum(len(w['word']) + 1 for w in current_block) + duration = end - block_start + + if current_text_len + len(word['word']) > max_chars or duration > max_duration: + # Finalize current block + # End time of block is start of this word (gap) or end of last word? + # Usually end of last word. + block_end = current_block[-1]['end'] - clip_start + + text = " ".join([w['word'] for w in current_block]).strip() + srt_content += format_srt_block(index, block_start, block_end, text) + index += 1 + + current_block = [word] + block_start = start + else: + current_block.append(word) + + # Final block + if current_block: + block_end = current_block[-1]['end'] - clip_start + text = " ".join([w['word'] for w in current_block]).strip() + srt_content += format_srt_block(index, block_start, block_end, text) + + with open(output_path, 'w', encoding='utf-8') as f: + f.write(srt_content) + + return True + + +def format_srt_block(index, start, end, text): + def format_time(seconds): + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + secs = int(seconds % 60) + millis = int((seconds - int(seconds)) * 1000) + return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}" + + return f"{index}\n{format_time(start)} --> {format_time(end)}\n{text}\n\n" diff --git a/openshorts/overlays/subtitles_render.py b/openshorts/overlays/subtitles_render.py new file mode 100644 index 00000000..b034cb17 --- /dev/null +++ b/openshorts/overlays/subtitles_render.py @@ -0,0 +1,93 @@ +"""Subtitle burn-in: FFmpeg subtitles filter + ASS color/style conversion.""" + +import subprocess + + +def hex_to_ass_color(hex_color, opacity=1.0): + """Convert #RRGGBB to ASS &HAABBGGRR format. opacity: 0.0=transparent, 1.0=opaque""" + hex_color = hex_color.lstrip('#') + if len(hex_color) != 6: + hex_color = "FFFFFF" + r = int(hex_color[0:2], 16) + g = int(hex_color[2:4], 16) + b = int(hex_color[4:6], 16) + alpha = round((1.0 - opacity) * 255) + return f"&H{alpha:02X}{b:02X}{g:02X}{r:02X}" + + +def burn_subtitles(video_path, srt_path, output_path, alignment=2, fontsize=16, + font_name="Verdana", font_color="#FFFFFF", + border_color="#000000", border_width=2, + bg_color="#000000", bg_opacity=0.0): + """ + Burns subtitles into the video using FFmpeg. + Supports two modes: + - Outline mode (bg_opacity=0): Text with colored outline/border + - Box mode (bg_opacity>0): Text with semi-transparent background box + """ + # Position mapping + ass_alignment = 2 + align_lower = str(alignment).lower() + if align_lower == 'top': + ass_alignment = 6 + elif align_lower == 'middle': + ass_alignment = 10 + elif align_lower == 'bottom': + ass_alignment = 2 + + # Font size scaling for ASS virtual resolution (PlayResY=288 default) + # For vertical 1080x1920 video, we need larger text for readability + final_fontsize = int(fontsize * 0.85) + if final_fontsize < 10: + final_fontsize = 10 + + # Path handling for FFmpeg filter syntax + safe_srt_path = srt_path.replace('\\', '/').replace(':', '\\:') + + # Convert colors to ASS format and build style + primary_colour = hex_to_ass_color(font_color, 1.0) + + if bg_opacity > 0: + # Box mode: opaque background box + border_style = 3 + outline_colour = hex_to_ass_color(bg_color, bg_opacity) + outline_width = 1 + else: + # Outline mode: text border/outline + border_style = 1 + outline_colour = hex_to_ass_color(border_color, 1.0) + outline_width = max(1, border_width) + + back_colour = hex_to_ass_color("#000000", 0.0) + + style_string = ( + f"Alignment={ass_alignment}," + f"Fontname={font_name}," + f"Fontsize={final_fontsize}," + f"PrimaryColour={primary_colour}," + f"OutlineColour={outline_colour}," + f"BackColour={back_colour}," + f"BorderStyle={border_style}," + f"Outline={outline_width}," + f"Shadow=0," + f"MarginV=25," + f"Bold=1" + ) + + cmd = [ + 'ffmpeg', '-y', + '-i', video_path, + '-vf', f"subtitles='{safe_srt_path}':force_style='{style_string}'", + '-c:a', 'copy', + '-c:v', 'libx264', '-preset', 'fast', '-crf', '23', + output_path + ] + + print(f"🎬 Burning subtitles: {' '.join(cmd)}") + result = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) + + if result.returncode != 0: + print(f"❌ FFmpeg Subtitle Error: {result.stderr.decode()}") + raise Exception(f"FFmpeg failed: {result.stderr.decode()}") + + return True diff --git a/subtitles.py b/subtitles.py index f961a0a4..03c8f6fc 100644 --- a/subtitles.py +++ b/subtitles.py @@ -1,224 +1,19 @@ -import os -import subprocess - - -def transcribe_audio(video_path): - """ - Transcribe audio from a video file using faster-whisper. - Returns transcript in the same format as main.py for compatibility. - """ - from faster_whisper import WhisperModel - - print(f"🎙️ Transcribing audio from: {video_path}") - - # Run on CPU with INT8 quantization for speed - model = WhisperModel("base", device="cpu", compute_type="int8") - - segments, info = model.transcribe(video_path, word_timestamps=True) - - transcript = { - "segments": [], - "language": info.language - } - - for segment in segments: - seg_data = { - "start": segment.start, - "end": segment.end, - "text": segment.text, - "words": [] - } - if segment.words: - for word in segment.words: - seg_data["words"].append({ - "word": word.word.strip(), - "start": word.start, - "end": word.end - }) - transcript["segments"].append(seg_data) - - print(f"✅ Transcription complete. Language: {info.language}") - return transcript - - -def generate_srt_from_video(video_path, output_path, max_chars=20, max_duration=2.0): - """ - Transcribe a video and generate SRT directly. - Used for dubbed videos that don't have a pre-existing transcript. - """ - transcript = transcribe_audio(video_path) - - # Get video duration to use as clip_end - import cv2 - cap = cv2.VideoCapture(video_path) - fps = cap.get(cv2.CAP_PROP_FPS) - frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - duration = frame_count / fps if fps else 0 - cap.release() - - return generate_srt(transcript, 0, duration, output_path, max_chars, max_duration) - - -def generate_srt(transcript, clip_start, clip_end, output_path, max_chars=20, max_duration=2.0): - """ - Generates an SRT file from the transcript for a specific time range. - Groups words into short lines suitable for vertical video. - """ - - words = [] - # 1. Extract and flatten words within range - for segment in transcript.get('segments', []): - for word_info in segment.get('words', []): - # Check overlap - if word_info['end'] > clip_start and word_info['start'] < clip_end: - words.append(word_info) - - if not words: - return False - - srt_content = "" - index = 1 - - current_block = [] - block_start = None - - for i, word in enumerate(words): - # Adjust times relative to clip - start = max(0, word['start'] - clip_start) - end = max(0, word['end'] - clip_start) - - # Clip to video duration logic handled by ffmpeg usually, but good to be safe - - if not current_block: - current_block.append(word) - block_start = start - else: - # Decide whether to close block - current_text_len = sum(len(w['word']) + 1 for w in current_block) - duration = end - block_start - - if current_text_len + len(word['word']) > max_chars or duration > max_duration: - # Finalize current block - # End time of block is start of this word (gap) or end of last word? - # Usually end of last word. - block_end = current_block[-1]['end'] - clip_start - - text = " ".join([w['word'] for w in current_block]).strip() - srt_content += format_srt_block(index, block_start, block_end, text) - index += 1 - - current_block = [word] - block_start = start - else: - current_block.append(word) - - # Final block - if current_block: - block_end = current_block[-1]['end'] - clip_start - text = " ".join([w['word'] for w in current_block]).strip() - srt_content += format_srt_block(index, block_start, block_end, text) - - with open(output_path, 'w', encoding='utf-8') as f: - f.write(srt_content) - - return True - -def format_srt_block(index, start, end, text): - def format_time(seconds): - hours = int(seconds // 3600) - minutes = int((seconds % 3600) // 60) - secs = int(seconds % 60) - millis = int((seconds - int(seconds)) * 1000) - return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}" - - return f"{index}\n{format_time(start)} --> {format_time(end)}\n{text}\n\n" - -def hex_to_ass_color(hex_color, opacity=1.0): - """Convert #RRGGBB to ASS &HAABBGGRR format. opacity: 0.0=transparent, 1.0=opaque""" - hex_color = hex_color.lstrip('#') - if len(hex_color) != 6: - hex_color = "FFFFFF" - r = int(hex_color[0:2], 16) - g = int(hex_color[2:4], 16) - b = int(hex_color[4:6], 16) - alpha = round((1.0 - opacity) * 255) - return f"&H{alpha:02X}{b:02X}{g:02X}{r:02X}" - - -def burn_subtitles(video_path, srt_path, output_path, alignment=2, fontsize=16, - font_name="Verdana", font_color="#FFFFFF", - border_color="#000000", border_width=2, - bg_color="#000000", bg_opacity=0.0): - """ - Burns subtitles into the video using FFmpeg. - Supports two modes: - - Outline mode (bg_opacity=0): Text with colored outline/border - - Box mode (bg_opacity>0): Text with semi-transparent background box - """ - # Position mapping - ass_alignment = 2 - align_lower = str(alignment).lower() - if align_lower == 'top': - ass_alignment = 6 - elif align_lower == 'middle': - ass_alignment = 10 - elif align_lower == 'bottom': - ass_alignment = 2 - - # Font size scaling for ASS virtual resolution (PlayResY=288 default) - # For vertical 1080x1920 video, we need larger text for readability - final_fontsize = int(fontsize * 0.85) - if final_fontsize < 10: - final_fontsize = 10 - - # Path handling for FFmpeg filter syntax - safe_srt_path = srt_path.replace('\\', '/').replace(':', '\\:') - - # Convert colors to ASS format and build style - primary_colour = hex_to_ass_color(font_color, 1.0) - - if bg_opacity > 0: - # Box mode: opaque background box - border_style = 3 - outline_colour = hex_to_ass_color(bg_color, bg_opacity) - outline_width = 1 - else: - # Outline mode: text border/outline - border_style = 1 - outline_colour = hex_to_ass_color(border_color, 1.0) - outline_width = max(1, border_width) - - back_colour = hex_to_ass_color("#000000", 0.0) - - style_string = ( - f"Alignment={ass_alignment}," - f"Fontname={font_name}," - f"Fontsize={final_fontsize}," - f"PrimaryColour={primary_colour}," - f"OutlineColour={outline_colour}," - f"BackColour={back_colour}," - f"BorderStyle={border_style}," - f"Outline={outline_width}," - f"Shadow=0," - f"MarginV=25," - f"Bold=1" - ) - - cmd = [ - 'ffmpeg', '-y', - '-i', video_path, - '-vf', f"subtitles='{safe_srt_path}':force_style='{style_string}'", - '-c:a', 'copy', - '-c:v', 'libx264', '-preset', 'fast', '-crf', '23', - output_path - ] - - print(f"🎬 Burning subtitles: {' '.join(cmd)}") - result = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) - - if result.returncode != 0: - print(f"❌ FFmpeg Subtitle Error: {result.stderr.decode()}") - raise Exception(f"FFmpeg failed: {result.stderr.decode()}") - - return True - +"""Compat shim: re-exports openshorts.overlays.subtitles_* at the original path. + +This module was split into two files as part of the restructure: +- openshorts/overlays/subtitles_generate.py (transcribe + SRT writing) +- openshorts/overlays/subtitles_render.py (FFmpeg subtitles burn-in) + +New code should import from those modules directly. This shim keeps existing +`from subtitles import ...` calls working. +""" +from openshorts.overlays.subtitles_generate import ( # noqa: F401 + transcribe_audio, + generate_srt_from_video, + generate_srt, + format_srt_block, +) +from openshorts.overlays.subtitles_render import ( # noqa: F401 + hex_to_ass_color, + burn_subtitles, +) From 830dff4f5ad118edbb9f2029cb62ef998dd489b9 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse Date: Tue, 19 May 2026 15:51:07 -0400 Subject: [PATCH 07/43] chore(restructure): split editor -> editing/ai_filters + editing/prompts + utils/filters Phase 1 step 5: separate the Gemini-driven filter generator. The VideoEditor class moves to openshorts/editing/ai_filters.py; the two long Gemini prompt strings move to openshorts/editing/prompts.py as functions; the previously private filter helpers (sanitize_filter_string, enforce_zoompan_output_size, split_filter_chain) move to openshorts/utils/filters.py so the future motion-graphics and audio compositors can reuse them. VideoEditor still re-exposes the helpers as static/classmethods so the existing characterization tests pass unchanged. Shim at editor.py keeps `from editor import VideoEditor` working. Tests stay 62/62 green. Co-Authored-By: Claude Opus 4.7 (1M context) --- editor.py | 383 +------------------------------ openshorts/editing/ai_filters.py | 235 +++++++++++++++++++ openshorts/editing/prompts.py | 130 +++++++++++ openshorts/utils/filters.py | 59 +++++ 4 files changed, 433 insertions(+), 374 deletions(-) create mode 100644 openshorts/editing/ai_filters.py create mode 100644 openshorts/editing/prompts.py create mode 100644 openshorts/utils/filters.py diff --git a/editor.py b/editor.py index 8a726dc0..1c463222 100644 --- a/editor.py +++ b/editor.py @@ -1,376 +1,11 @@ -import os -import json -import re -import subprocess -import time -from google import genai -from google.genai import types +"""Compat shim: re-exports openshorts.editing.ai_filters.VideoEditor at the original path. -class VideoEditor: - def __init__(self, api_key): - self.client = genai.Client(api_key=api_key) - self.model_name = "gemini-3-flash-preview" +This module was split into three files as part of the restructure: +- openshorts/editing/ai_filters.py (VideoEditor class) +- openshorts/editing/prompts.py (Gemini prompt templates) +- openshorts/utils/filters.py (shared FFmpeg filter helpers) - def upload_video(self, video_path): - """Uploads video to Gemini File API.""" - print(f"📤 Uploading {video_path} to Gemini...") - - # Ensure we are passing a path that exists - if not os.path.exists(video_path): - raise FileNotFoundError(f"Video file not found: {video_path}") - - # Using 'file' keyword instead of 'path' - try: - file_upload = self.client.files.upload(file=video_path) - except Exception as e: - print(f"❌ Gemini Upload Error: {e}") - raise e - - # Wait for processing - print("⏳ Waiting for video processing by Gemini...") - while True: - file_info = self.client.files.get(name=file_upload.name) - if file_info.state == "ACTIVE": - print("✅ Video processed and ready.") - return file_upload - elif file_info.state == "FAILED": - raise Exception("Video processing failed by Gemini.") - time.sleep(2) - - def get_ffmpeg_filter(self, video_file_obj, duration, fps=30, width=None, height=None, transcript=None): - """Asks Gemini for a raw FFmpeg filter string.""" - if width is None or height is None: - # Keep prompt usable even if caller didn't pass dimensions. - width, height = 1080, 1920 - - transcript_text = json.dumps(transcript) if transcript else "Not available." - - prompt = f""" - You are an expert FFmpeg video editor. Your task is to generate a complex video filter string to make a short video viral, BUT ONLY apply effects where they make sense contextually. - - Video Duration: {duration} seconds. - Video FPS: {fps} - Video Resolution (MUST KEEP EXACT): {width}x{height} - - TRANSCRIPT (Context of what is being said): - {transcript_text} - - Goal: Enhance the video with dynamic zooms, cuts (simulated with punch-ins), and visual effects to increase retention, but DO NOT overdo it. Random effects are bad. Contextual effects are good. - - Instructions: - 1. ANALYZE THE VIDEO AND TRANSCRIPT: Understand the mood, the pacing, and the key moments. - 2. APPLY EFFECTS ONLY WHEN RELEVANT: - - Use "punch-in" zooms (zoompan) to emphasize key points, jokes, or dramatic moments in the speech. - - slow zooms to face when the speaker is speaking - - Use visual effects (contrast, saturation, sharpness) to highlight mood changes or specific segments. - - If nothing significant is happening, keep it simple. It is BETTER to have no effect than a random/distracting one. - - Avoid constant motion if the speaker is delivering a serious or steady message. - 3. Create a single valid FFmpeg filter complex string (for the -vf flag). - 4. Use filters like `zoompan`, `eq` (contrast), `hue` (saturation/bw), `unsharp`. - 5. Pacing: Align effects with the rhythm of the speech (from transcript) or visual action. - 6. CRITICAL SYNTAX RULES: - - DO NOT use comparison operators like `<`, `>`, `<=`, `>=` anywhere. They frequently break FFmpeg expression parsing. - - USE FFmpeg expression FUNCTIONS instead: - - `between(x,a,b)` - - `lt(x,y)`, `lte(x,y)`, `gt(x,y)`, `gte(x,y)` - - `if(cond,then,else)` - - Always wrap expression values in single quotes: `z='...'`, `x='...'`, `y='...'`, `enable='...'`. - - - FOR `zoompan`: - - Prefer `on` (output frame index) to avoid time-variable quirks. - - Convert seconds to frames using FPS={fps}: `frame = seconds * {fps}`. - - Use `between(on, startFrame, endFrame)` for segmenting and pacing. - - Example: - `zoompan=z='1.1*between(on,0,75)+1.3*between(on,76,150)+1.15*between(on,151,300)+1.2*gte(on,301)'` - - ALWAYS set zoompan output size to EXACT `{width}x{height}` using `s={width}x{height}`. - - ALWAYS set `fps={fps}` and `d=1`. - - DO NOT use `scale`, `crop`, `pad` unless you keep EXACT `{width}x{height}` (no aspect ratio changes). - - - FOR `eq`, `hue`, `curves`, `unsharp` (Visual Effects): - - **DO NOT** use dynamic expressions for parameter values (e.g. `contrast='1+0.5*t'`). - - **USE TIMELINE EDITING** via the `enable` option. - - Create MULTIPLE filter instances for different time ranges. - - **SYNTAX FOR ENABLE:** - - **USE** `between(t,start,end)` for clarity and robustness. - - **USE** single quotes around the enable expression. - - **Example:** `eq=contrast=1.2:enable='between(t,0,3)'` - - **Example:** `hue=s=0:enable='between(t,10,12)'` - - This is much safer and robust than boolean multiplication. - - Constraints: - - Output JSON with a single key: "filter_string". - - The value must be the RAW filter string ready to be passed to `-vf`. - - OUTPUT MUST KEEP EXACT RESOLUTION AND ASPECT RATIO: {width}x{height}. - - Do NOT output 1280x720 or 1080x1080 unless the input is exactly that. - - IMPORTANT: Do NOT include the `-vf` flag itself, just the filter content. - - IMPORTANT: Ensure syntax is correct for FFmpeg. - - Output JSON: - {{ - "filter_string": "..." - }} - """ - - print("🤖 Asking Gemini for FFmpeg filter...") - response = self.client.models.generate_content( - model=self.model_name, - contents=[video_file_obj, prompt], - config=types.GenerateContentConfig( - response_mime_type="application/json" - ) - ) - - print(f"🔍 DEBUG: Gemini Raw Response:\n{response.text}") - - try: - # Clean response text (remove potential markdown blocks) - text = response.text - if text.startswith("```json"): - text = text[7:] - elif text.startswith("```"): - text = text[3:] - - if text.endswith("```"): - text = text[:-3] - - text = text.strip() - - # Additional cleanup for potential trailing characters outside JSON - # Find the first '{' and last '}' - start_idx = text.find('{') - end_idx = text.rfind('}') - - if start_idx != -1 and end_idx != -1: - text = text[start_idx:end_idx+1] - - print(f"🔍 DEBUG: Cleaned JSON Text:\n{text}") - - return json.loads(text) - except json.JSONDecodeError: - print(f"❌ Failed to parse JSON: {response.text}") - return None - - def get_effects_config(self, video_file_obj, duration, fps=30, width=None, height=None, transcript=None): - """Asks Gemini for a structured EffectsConfig JSON for Remotion rendering.""" - if width is None or height is None: - width, height = 1080, 1920 - - transcript_text = json.dumps(transcript) if transcript else "Not available." - - prompt = f""" - You are an expert video editor analyzing a video and its transcript to generate dynamic visual effects for a Remotion-based renderer. - - Video Duration: {duration} seconds. - Video FPS: {fps} - Video Resolution: {width}x{height} - - TRANSCRIPT (Context of what is being said): - {transcript_text} - - Your task is to produce a structured JSON describing time-based effect segments that cover the FULL video duration. - - Each segment has these fields: - - "startSec" (number): Start time in seconds. - - "endSec" (number): End time in seconds. - - "zoom" (number): Zoom level. 1.0 = no zoom, max 1.5. Use subtle values like 1.05-1.2 for most cases. - - "zoomCenterX" (number): Horizontal focus point for zoom, 0.0 (left) to 1.0 (right). 0.5 = center. - - "zoomCenterY" (number): Vertical focus point for zoom, 0.0 (top) to 1.0 (bottom). 0.5 = center. - - "brightness" (number): Brightness multiplier. 1.0 = normal. Range 0.8-1.2. - - "contrast" (number): Contrast multiplier. 1.0 = normal. Range 0.8-1.3. - - "saturate" (number): Saturation multiplier. 1.0 = normal. Range 0.8-1.3. - - Instructions: - 1. ANALYZE the video content and transcript to understand mood, pacing, and key moments. - 2. Apply CONTEXTUAL effects aligned with speech and action: - - Use slow, subtle zooms toward the speaker's face during speaking moments. - - Emphasize key moments, punchlines, or dramatic beats with slightly stronger zoom or contrast. - - Keep transitions smooth — avoid jarring jumps between segments. - - If nothing significant is happening, keep values at defaults (zoom 1.0, all multipliers 1.0). - 3. Segments MUST cover the entire video duration from 0 to {duration} seconds with no gaps. - 4. Prefer fewer, longer segments with gradual changes over many rapid short segments. - 5. Output ONLY valid JSON, no explanations. - - Output format: - {{ - "segments": [ - {{ - "startSec": 0, - "endSec": 3.5, - "zoom": 1.0, - "zoomCenterX": 0.5, - "zoomCenterY": 0.5, - "brightness": 1.0, - "contrast": 1.0, - "saturate": 1.0 - }} - ] - }} - """ - - print("🤖 Asking Gemini for Remotion effects config...") - response = self.client.models.generate_content( - model=self.model_name, - contents=[video_file_obj, prompt], - config=types.GenerateContentConfig( - response_mime_type="application/json" - ) - ) - - print(f"🔍 DEBUG: Gemini Raw Response:\n{response.text}") - - try: - # Clean response text (remove potential markdown blocks) - text = response.text - if text.startswith("```json"): - text = text[7:] - elif text.startswith("```"): - text = text[3:] - - if text.endswith("```"): - text = text[:-3] - - text = text.strip() - - # Find the first '{' and last '}' - start_idx = text.find('{') - end_idx = text.rfind('}') - - if start_idx != -1 and end_idx != -1: - text = text[start_idx:end_idx+1] - - print(f"🔍 DEBUG: Cleaned JSON Text:\n{text}") - - return json.loads(text) - except json.JSONDecodeError: - print(f"❌ Failed to parse effects config JSON: {response.text}") - return None - - @staticmethod - def _split_filter_chain(filter_string: str) -> list[str]: - """Split a -vf filter chain on commas, respecting single-quoted substrings.""" - parts: list[str] = [] - start = 0 - in_quote = False - for i, ch in enumerate(filter_string): - if ch == "'": - in_quote = not in_quote - elif ch == "," and not in_quote: - parts.append(filter_string[start:i]) - start = i + 1 - parts.append(filter_string[start:]) - return parts - - @classmethod - def _enforce_zoompan_output_size(cls, filter_string: str, width: int, height: int) -> str: - """Force any zoompan filter to output the same geometry as the input clip.""" - parts = cls._split_filter_chain(filter_string) - out_parts: list[str] = [] - for part in parts: - if "zoompan=" in part: - # Force s=WxH inside zoompan options (digitsxdigits only). - if re.search(r":s=\d+x\d+", part): - part = re.sub(r":s=\d+x\d+", f":s={width}x{height}", part) - else: - part = f"{part}:s={width}x{height}" - out_parts.append(part) - return ",".join(out_parts) - - @staticmethod - def _sanitize_filter_string(filter_string: str) -> str: - """ - Best-effort sanitizer for Gemini-generated FFmpeg expressions. - Converts comparison operators (t<3, on>=75, etc.) into FFmpeg expr functions (lt(), gte(), ...), - which are far more reliably parsed across FFmpeg builds. - """ - s = filter_string - - # Order matters: handle >= / <= before > / < - patterns: list[tuple[re.Pattern[str], str]] = [ - (re.compile(r"(?=\s*(-?\d+(?:\.\d+)?)"), r"gte(\1,\2)"), - (re.compile(r"(?\s*(-?\d+(?:\.\d+)?)"), r"gt(\1,\2)"), - (re.compile(r"(?=75) before executing FFmpeg. - sanitized = self._sanitize_filter_string(filter_string) - if sanitized != filter_string: - print("🧼 Sanitized AI Filter (converted comparisons to lt/lte/gt/gte functions)") - print(f"🧼 Before: {filter_string}") - print(f"🧼 After: {sanitized}") - filter_string = sanitized - - # Enforce zoompan output size to preserve aspect ratio / resolution. - if w and h: - enforced = self._enforce_zoompan_output_size(filter_string, w, h) - if enforced != filter_string: - print(f"📐 Enforced zoompan output size to {w}x{h}") - filter_string = enforced - - # Ensure square pixels (avoid weird display stretching in some players). - if "setsar=" not in filter_string: - filter_string = f"{filter_string},setsar=1" - - print(f"🎬 Executing AI Filter: {filter_string}") - - cmd = [ - 'ffmpeg', '-y', - '-i', input_path, - '-vf', filter_string, - '-c:v', 'libx264', '-preset', 'fast', '-crf', '22', - '-c:a', 'copy', - output_path - ] - - # Use explicit environment with UTF-8 to avoid ascii errors in subprocess - env = os.environ.copy() - # On some minimal docker images, we need to ensure we use a UTF-8 locale - # Try C.UTF-8 first, fallback to en_US.UTF-8 if available, but C.UTF-8 is usually safer for minimal - env["LANG"] = "C.UTF-8" - env["LC_ALL"] = "C.UTF-8" - - try: - # We must encode arguments if filesystem is ascii but we have unicode chars - # But subprocess in Python 3 handles unicode args by encoding them with os.fsencode(). - # If sys.getfilesystemencoding() is ascii, this fails. - # We can't change fs encoding at runtime easily. - # Workaround: pass bytes directly? subprocess allows bytes in args. - - # Convert command elements to bytes assuming utf-8 if they are strings - cmd_bytes = [] - for arg in cmd: - if isinstance(arg, str): - cmd_bytes.append(arg.encode('utf-8')) - else: - cmd_bytes.append(arg) - - subprocess.run(cmd_bytes, check=True, env=env) - except subprocess.CalledProcessError as e: - print(f"❌ FFmpeg failed: {e}") - raise e - -if __name__ == "__main__": - pass \ No newline at end of file +New code should import from those modules directly. This shim keeps existing +`from editor import VideoEditor` calls working. +""" +from openshorts.editing.ai_filters import VideoEditor # noqa: F401 diff --git a/openshorts/editing/ai_filters.py b/openshorts/editing/ai_filters.py new file mode 100644 index 00000000..7199e265 --- /dev/null +++ b/openshorts/editing/ai_filters.py @@ -0,0 +1,235 @@ +"""VideoEditor: Gemini-driven FFmpeg filter generation and application. + +The shared filter helpers (sanitize_filter_string, enforce_zoompan_output_size) +live in openshorts/utils/filters.py and are exposed as static/classmethods on +VideoEditor for backwards compatibility. +""" +import os +import json +import subprocess +import time + +from google import genai +from google.genai import types + +from openshorts.editing.prompts import ( + build_ffmpeg_filter_prompt, + build_effects_config_prompt, +) +from openshorts.utils.filters import ( + split_filter_chain as _split_filter_chain_fn, + enforce_zoompan_output_size as _enforce_zoompan_output_size_fn, + sanitize_filter_string as _sanitize_filter_string_fn, +) + + +class VideoEditor: + def __init__(self, api_key): + self.client = genai.Client(api_key=api_key) + self.model_name = "gemini-3-flash-preview" + + def upload_video(self, video_path): + """Uploads video to Gemini File API.""" + print(f"📤 Uploading {video_path} to Gemini...") + + # Ensure we are passing a path that exists + if not os.path.exists(video_path): + raise FileNotFoundError(f"Video file not found: {video_path}") + + # Using 'file' keyword instead of 'path' + try: + file_upload = self.client.files.upload(file=video_path) + except Exception as e: + print(f"❌ Gemini Upload Error: {e}") + raise e + + # Wait for processing + print("⏳ Waiting for video processing by Gemini...") + while True: + file_info = self.client.files.get(name=file_upload.name) + if file_info.state == "ACTIVE": + print("✅ Video processed and ready.") + return file_upload + elif file_info.state == "FAILED": + raise Exception("Video processing failed by Gemini.") + time.sleep(2) + + def get_ffmpeg_filter(self, video_file_obj, duration, fps=30, width=None, height=None, transcript=None): + """Asks Gemini for a raw FFmpeg filter string.""" + if width is None or height is None: + # Keep prompt usable even if caller didn't pass dimensions. + width, height = 1080, 1920 + + prompt = build_ffmpeg_filter_prompt(duration, fps, width, height, transcript) + + print("🤖 Asking Gemini for FFmpeg filter...") + response = self.client.models.generate_content( + model=self.model_name, + contents=[video_file_obj, prompt], + config=types.GenerateContentConfig( + response_mime_type="application/json" + ) + ) + + print(f"🔍 DEBUG: Gemini Raw Response:\n{response.text}") + + try: + # Clean response text (remove potential markdown blocks) + text = response.text + if text.startswith("```json"): + text = text[7:] + elif text.startswith("```"): + text = text[3:] + + if text.endswith("```"): + text = text[:-3] + + text = text.strip() + + # Additional cleanup for potential trailing characters outside JSON + # Find the first '{' and last '}' + start_idx = text.find('{') + end_idx = text.rfind('}') + + if start_idx != -1 and end_idx != -1: + text = text[start_idx:end_idx+1] + + print(f"🔍 DEBUG: Cleaned JSON Text:\n{text}") + + return json.loads(text) + except json.JSONDecodeError: + print(f"❌ Failed to parse JSON: {response.text}") + return None + + def get_effects_config(self, video_file_obj, duration, fps=30, width=None, height=None, transcript=None): + """Asks Gemini for a structured EffectsConfig JSON for Remotion rendering.""" + if width is None or height is None: + width, height = 1080, 1920 + + prompt = build_effects_config_prompt(duration, fps, width, height, transcript) + + print("🤖 Asking Gemini for Remotion effects config...") + response = self.client.models.generate_content( + model=self.model_name, + contents=[video_file_obj, prompt], + config=types.GenerateContentConfig( + response_mime_type="application/json" + ) + ) + + print(f"🔍 DEBUG: Gemini Raw Response:\n{response.text}") + + try: + # Clean response text (remove potential markdown blocks) + text = response.text + if text.startswith("```json"): + text = text[7:] + elif text.startswith("```"): + text = text[3:] + + if text.endswith("```"): + text = text[:-3] + + text = text.strip() + + # Find the first '{' and last '}' + start_idx = text.find('{') + end_idx = text.rfind('}') + + if start_idx != -1 and end_idx != -1: + text = text[start_idx:end_idx+1] + + print(f"🔍 DEBUG: Cleaned JSON Text:\n{text}") + + return json.loads(text) + except json.JSONDecodeError: + print(f"❌ Failed to parse effects config JSON: {response.text}") + return None + + @staticmethod + def _split_filter_chain(filter_string): + return _split_filter_chain_fn(filter_string) + + @classmethod + def _enforce_zoompan_output_size(cls, filter_string, width, height): + return _enforce_zoompan_output_size_fn(filter_string, width, height) + + @staticmethod + def _sanitize_filter_string(filter_string): + return _sanitize_filter_string_fn(filter_string) + + def apply_edits(self, input_path, output_path, filter_data): + """Executes FFmpeg with the generated filter.""" + + if not filter_data or "filter_string" not in filter_data: + print("⚠️ No filter string found. Copying original.") + subprocess.run(['ffmpeg', '-y', '-i', input_path, '-c', 'copy', output_path]) + return + + filter_string = filter_data["filter_string"] + + # Get input dimensions so we can enforce geometry (avoid broken aspect ratios). + try: + probe_cmd = ['ffprobe', '-v', 'error', '-select_streams', 'v:0', '-show_entries', 'stream=width,height', '-of', 'csv=s=x:p=0', input_path] + res_out = subprocess.check_output(probe_cmd, env={**os.environ, "LANG": "C.UTF-8"}).decode().strip() + w, h = map(int, res_out.split('x')) + except Exception as e: + print(f"⚠️ Could not probe resolution: {e}") + w, h = None, None + + # Sanitize common expression pitfalls (e.g., t<3 / on>=75) before executing FFmpeg. + sanitized = _sanitize_filter_string_fn(filter_string) + if sanitized != filter_string: + print("🧼 Sanitized AI Filter (converted comparisons to lt/lte/gt/gte functions)") + print(f"🧼 Before: {filter_string}") + print(f"🧼 After: {sanitized}") + filter_string = sanitized + + # Enforce zoompan output size to preserve aspect ratio / resolution. + if w and h: + enforced = _enforce_zoompan_output_size_fn(filter_string, w, h) + if enforced != filter_string: + print(f"📐 Enforced zoompan output size to {w}x{h}") + filter_string = enforced + + # Ensure square pixels (avoid weird display stretching in some players). + if "setsar=" not in filter_string: + filter_string = f"{filter_string},setsar=1" + + print(f"🎬 Executing AI Filter: {filter_string}") + + cmd = [ + 'ffmpeg', '-y', + '-i', input_path, + '-vf', filter_string, + '-c:v', 'libx264', '-preset', 'fast', '-crf', '22', + '-c:a', 'copy', + output_path + ] + + # Use explicit environment with UTF-8 to avoid ascii errors in subprocess + env = os.environ.copy() + # On some minimal docker images, we need to ensure we use a UTF-8 locale + # Try C.UTF-8 first, fallback to en_US.UTF-8 if available, but C.UTF-8 is usually safer for minimal + env["LANG"] = "C.UTF-8" + env["LC_ALL"] = "C.UTF-8" + + try: + # We must encode arguments if filesystem is ascii but we have unicode chars + # But subprocess in Python 3 handles unicode args by encoding them with os.fsencode(). + # If sys.getfilesystemencoding() is ascii, this fails. + # We can't change fs encoding at runtime easily. + # Workaround: pass bytes directly? subprocess allows bytes in args. + + # Convert command elements to bytes assuming utf-8 if they are strings + cmd_bytes = [] + for arg in cmd: + if isinstance(arg, str): + cmd_bytes.append(arg.encode('utf-8')) + else: + cmd_bytes.append(arg) + + subprocess.run(cmd_bytes, check=True, env=env) + except subprocess.CalledProcessError as e: + print(f"❌ FFmpeg failed: {e}") + raise e diff --git a/openshorts/editing/prompts.py b/openshorts/editing/prompts.py new file mode 100644 index 00000000..9cf3b351 --- /dev/null +++ b/openshorts/editing/prompts.py @@ -0,0 +1,130 @@ +"""Gemini prompt templates for AI video-effect generation. + +Kept as functions returning rendered strings with width/height/fps/duration/transcript +substituted in. The matching call sites in ai_filters.py each pass these args through. +""" +import json + + +def build_ffmpeg_filter_prompt(duration, fps, width, height, transcript): + """Prompt for raw FFmpeg filter_complex string (used by /api/edit).""" + transcript_text = json.dumps(transcript) if transcript else "Not available." + return f""" + You are an expert FFmpeg video editor. Your task is to generate a complex video filter string to make a short video viral, BUT ONLY apply effects where they make sense contextually. + + Video Duration: {duration} seconds. + Video FPS: {fps} + Video Resolution (MUST KEEP EXACT): {width}x{height} + + TRANSCRIPT (Context of what is being said): + {transcript_text} + + Goal: Enhance the video with dynamic zooms, cuts (simulated with punch-ins), and visual effects to increase retention, but DO NOT overdo it. Random effects are bad. Contextual effects are good. + + Instructions: + 1. ANALYZE THE VIDEO AND TRANSCRIPT: Understand the mood, the pacing, and the key moments. + 2. APPLY EFFECTS ONLY WHEN RELEVANT: + - Use "punch-in" zooms (zoompan) to emphasize key points, jokes, or dramatic moments in the speech. + - slow zooms to face when the speaker is speaking + - Use visual effects (contrast, saturation, sharpness) to highlight mood changes or specific segments. + - If nothing significant is happening, keep it simple. It is BETTER to have no effect than a random/distracting one. + - Avoid constant motion if the speaker is delivering a serious or steady message. + 3. Create a single valid FFmpeg filter complex string (for the -vf flag). + 4. Use filters like `zoompan`, `eq` (contrast), `hue` (saturation/bw), `unsharp`. + 5. Pacing: Align effects with the rhythm of the speech (from transcript) or visual action. + 6. CRITICAL SYNTAX RULES: + - DO NOT use comparison operators like `<`, `>`, `<=`, `>=` anywhere. They frequently break FFmpeg expression parsing. + - USE FFmpeg expression FUNCTIONS instead: + - `between(x,a,b)` + - `lt(x,y)`, `lte(x,y)`, `gt(x,y)`, `gte(x,y)` + - `if(cond,then,else)` + - Always wrap expression values in single quotes: `z='...'`, `x='...'`, `y='...'`, `enable='...'`. + + - FOR `zoompan`: + - Prefer `on` (output frame index) to avoid time-variable quirks. + - Convert seconds to frames using FPS={fps}: `frame = seconds * {fps}`. + - Use `between(on, startFrame, endFrame)` for segmenting and pacing. + - Example: + `zoompan=z='1.1*between(on,0,75)+1.3*between(on,76,150)+1.15*between(on,151,300)+1.2*gte(on,301)'` + - ALWAYS set zoompan output size to EXACT `{width}x{height}` using `s={width}x{height}`. + - ALWAYS set `fps={fps}` and `d=1`. + - DO NOT use `scale`, `crop`, `pad` unless you keep EXACT `{width}x{height}` (no aspect ratio changes). + + - FOR `eq`, `hue`, `curves`, `unsharp` (Visual Effects): + - **DO NOT** use dynamic expressions for parameter values (e.g. `contrast='1+0.5*t'`). + - **USE TIMELINE EDITING** via the `enable` option. + - Create MULTIPLE filter instances for different time ranges. + - **SYNTAX FOR ENABLE:** + - **USE** `between(t,start,end)` for clarity and robustness. + - **USE** single quotes around the enable expression. + - **Example:** `eq=contrast=1.2:enable='between(t,0,3)'` + - **Example:** `hue=s=0:enable='between(t,10,12)'` + - This is much safer and robust than boolean multiplication. + + Constraints: + - Output JSON with a single key: "filter_string". + - The value must be the RAW filter string ready to be passed to `-vf`. + - OUTPUT MUST KEEP EXACT RESOLUTION AND ASPECT RATIO: {width}x{height}. + - Do NOT output 1280x720 or 1080x1080 unless the input is exactly that. + - IMPORTANT: Do NOT include the `-vf` flag itself, just the filter content. + - IMPORTANT: Ensure syntax is correct for FFmpeg. + + Output JSON: + {{ + "filter_string": "..." + }} + """ + + +def build_effects_config_prompt(duration, fps, width, height, transcript): + """Prompt for structured EffectsConfig JSON (used by Remotion renderer).""" + transcript_text = json.dumps(transcript) if transcript else "Not available." + return f""" + You are an expert video editor analyzing a video and its transcript to generate dynamic visual effects for a Remotion-based renderer. + + Video Duration: {duration} seconds. + Video FPS: {fps} + Video Resolution: {width}x{height} + + TRANSCRIPT (Context of what is being said): + {transcript_text} + + Your task is to produce a structured JSON describing time-based effect segments that cover the FULL video duration. + + Each segment has these fields: + - "startSec" (number): Start time in seconds. + - "endSec" (number): End time in seconds. + - "zoom" (number): Zoom level. 1.0 = no zoom, max 1.5. Use subtle values like 1.05-1.2 for most cases. + - "zoomCenterX" (number): Horizontal focus point for zoom, 0.0 (left) to 1.0 (right). 0.5 = center. + - "zoomCenterY" (number): Vertical focus point for zoom, 0.0 (top) to 1.0 (bottom). 0.5 = center. + - "brightness" (number): Brightness multiplier. 1.0 = normal. Range 0.8-1.2. + - "contrast" (number): Contrast multiplier. 1.0 = normal. Range 0.8-1.3. + - "saturate" (number): Saturation multiplier. 1.0 = normal. Range 0.8-1.3. + + Instructions: + 1. ANALYZE the video content and transcript to understand mood, pacing, and key moments. + 2. Apply CONTEXTUAL effects aligned with speech and action: + - Use slow, subtle zooms toward the speaker's face during speaking moments. + - Emphasize key moments, punchlines, or dramatic beats with slightly stronger zoom or contrast. + - Keep transitions smooth — avoid jarring jumps between segments. + - If nothing significant is happening, keep values at defaults (zoom 1.0, all multipliers 1.0). + 3. Segments MUST cover the entire video duration from 0 to {duration} seconds with no gaps. + 4. Prefer fewer, longer segments with gradual changes over many rapid short segments. + 5. Output ONLY valid JSON, no explanations. + + Output format: + {{ + "segments": [ + {{ + "startSec": 0, + "endSec": 3.5, + "zoom": 1.0, + "zoomCenterX": 0.5, + "zoomCenterY": 0.5, + "brightness": 1.0, + "contrast": 1.0, + "saturate": 1.0 + }} + ] + }} + """ diff --git a/openshorts/utils/filters.py b/openshorts/utils/filters.py new file mode 100644 index 00000000..9ea52b85 --- /dev/null +++ b/openshorts/utils/filters.py @@ -0,0 +1,59 @@ +"""Shared FFmpeg filter helpers: chain splitting, sanitization, zoompan size enforcement. + +These were originally private statics on editor.VideoEditor; moved here so the +motion-graphics and audio compositors can reuse them without importing the +editing module. VideoEditor still re-exposes them as static/classmethods for +backwards compatibility. +""" +import re + + +def split_filter_chain(filter_string: str) -> list: + """Split a -vf filter chain on commas, respecting single-quoted substrings.""" + parts = [] + start = 0 + in_quote = False + for i, ch in enumerate(filter_string): + if ch == "'": + in_quote = not in_quote + elif ch == "," and not in_quote: + parts.append(filter_string[start:i]) + start = i + 1 + parts.append(filter_string[start:]) + return parts + + +def enforce_zoompan_output_size(filter_string: str, width: int, height: int) -> str: + """Force any zoompan filter to output the same geometry as the input clip.""" + parts = split_filter_chain(filter_string) + out_parts = [] + for part in parts: + if "zoompan=" in part: + # Force s=WxH inside zoompan options (digitsxdigits only). + if re.search(r":s=\d+x\d+", part): + part = re.sub(r":s=\d+x\d+", f":s={width}x{height}", part) + else: + part = f"{part}:s={width}x{height}" + out_parts.append(part) + return ",".join(out_parts) + + +# Order matters: handle >= / <= before > / < +_COMPARISON_PATTERNS = [ + (re.compile(r"(?=\s*(-?\d+(?:\.\d+)?)"), r"gte(\1,\2)"), + (re.compile(r"(?\s*(-?\d+(?:\.\d+)?)"), r"gt(\1,\2)"), + (re.compile(r"(? str: + """ + Best-effort sanitizer for Gemini-generated FFmpeg expressions. + Converts comparison operators (t<3, on>=75, etc.) into FFmpeg expr functions + (lt(), gte(), ...), which are far more reliably parsed across FFmpeg builds. + """ + s = filter_string + for pat, repl in _COMPARISON_PATTERNS: + s = pat.sub(repl, s) + return s From 0546eb034c53c129fa018139849c8cf4662311a2 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse Date: Tue, 19 May 2026 15:52:36 -0400 Subject: [PATCH 08/43] chore(restructure): split thumbnail -> thumbnails/{titles,images,descriptions}.py Phase 1 step 6: separate the thumbnail workflow into three modules. Each concern is < 100 lines and independently testable. Shim at the old path preserves existing imports. Tests stay 62/62 green. Co-Authored-By: Claude Opus 4.7 (1M context) --- openshorts/thumbnails/descriptions.py | 65 +++++ openshorts/thumbnails/images.py | 108 ++++++++ openshorts/thumbnails/titles.py | 170 +++++++++++++ thumbnail.py | 345 +------------------------- 4 files changed, 356 insertions(+), 332 deletions(-) create mode 100644 openshorts/thumbnails/descriptions.py create mode 100644 openshorts/thumbnails/images.py create mode 100644 openshorts/thumbnails/titles.py diff --git a/openshorts/thumbnails/descriptions.py b/openshorts/thumbnails/descriptions.py new file mode 100644 index 00000000..723b2a58 --- /dev/null +++ b/openshorts/thumbnails/descriptions.py @@ -0,0 +1,65 @@ +"""YouTube description + chapter-marker generation from transcript segments.""" + +from google import genai + + +def generate_youtube_description(api_key, title, transcript_segments, language, video_duration): + """ + Uses Gemini to generate a YouTube description with chapter markers from transcript segments. + Returns: { "description": "full description text with chapters" } + """ + client = genai.Client(api_key=api_key) + + # Format segments for the prompt + formatted_segments = [] + for seg in transcript_segments: + start = seg.get("start", 0) + mins = int(start // 60) + secs = int(start % 60) + timestamp = f"{mins}:{secs:02d}" + formatted_segments.append(f"[{timestamp}] {seg.get('text', '').strip()}") + + segments_text = "\n".join(formatted_segments) + + # Format total duration + dur_mins = int(video_duration // 60) + dur_secs = int(video_duration % 60) + duration_str = f"{dur_mins}:{dur_secs:02d}" + + prompt = f"""You are a YouTube SEO expert. Generate a complete YouTube video description for the following video. + +VIDEO TITLE: "{title}" +VIDEO LANGUAGE: {language} +VIDEO DURATION: {duration_str} + +TRANSCRIPT WITH TIMESTAMPS: +{segments_text} + +REQUIREMENTS: +1. Write the description in the SAME LANGUAGE as the video ({language}) +2. Start with a compelling 2-3 sentence summary/hook +3. Add relevant CTAs (subscribe, like, comment) +4. Generate YouTube CHAPTERS based on the transcript timestamps: + - First chapter MUST start at 0:00 + - Minimum 3 chapters, each at least 10 seconds apart + - Chapter titles should be concise and descriptive + - Format: 0:00 Chapter Title + - Place chapters in their own section with a blank line before and after +5. Add 5-10 relevant hashtags at the end +6. Keep the total description under 5000 characters + +OUTPUT: Return ONLY the description text (no JSON wrapper, no markdown code blocks). The description should be ready to paste directly into YouTube.""" + + print("🤖 [Thumbnail] Generating YouTube description with chapters...") + response = client.models.generate_content( + model="gemini-2.5-flash", + contents=[prompt], + ) + + description = response.text.strip() + # Clean up any accidental markdown wrappers + if description.startswith("```"): + lines = description.split("\n") + description = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:]) + + return {"description": description} diff --git a/openshorts/thumbnails/images.py b/openshorts/thumbnails/images.py new file mode 100644 index 00000000..e777e468 --- /dev/null +++ b/openshorts/thumbnails/images.py @@ -0,0 +1,108 @@ +"""Thumbnail image generation via Gemini multimodal image preview model.""" + +import os +from google import genai +from google.genai import types +from PIL import Image + + +def generate_thumbnail(api_key, title, session_id, face_image_path=None, bg_image_path=None, extra_prompt="", count=3, video_context=""): + """ + Generates YouTube thumbnails using Gemini image generation. + Returns list of saved image paths (relative URLs). + """ + client = genai.Client(api_key=api_key) + + output_dir = os.path.join("output", "thumbnails", session_id) + os.makedirs(output_dir, exist_ok=True) + + prompt_parts = [] + + # Add face image if provided + if face_image_path and os.path.exists(face_image_path): + face_img = Image.open(face_image_path) + prompt_parts.append(face_img) + + # Add background image if provided + if bg_image_path and os.path.exists(bg_image_path): + bg_img = Image.open(bg_image_path) + prompt_parts.append(bg_img) + + # Build video context block + context_block = "" + if video_context: + context_block = f""" +VIDEO CONTEXT (use this to understand the video and design a relevant thumbnail): +{video_context} +""" + + # Build extra instructions block (high priority) + extra_block = "" + if extra_prompt: + extra_block = f""" +⚠️ MANDATORY USER INSTRUCTIONS (MUST follow these exactly — they override any default behavior): +{extra_prompt} +""" + + text_prompt = f"""Generate a professional, eye-catching YouTube thumbnail image. + +VIDEO TITLE (for reference — do NOT put the full title on the thumbnail): "{title}" +{context_block} +TEXT ON THE THUMBNAIL: +- Based on the title AND the video context, create a SHORT visual hook: 1 to 5 words maximum +- It should capture the core emotion, surprise, or promise of the video +- The thumbnail text should COMPLEMENT the YouTube title (which appears below), not repeat it +- Examples: "$10K EN 30 DÍAS", "ESTO FUNCIONA", "NO LO SABÍAS", "GRATIS 🔥" +- Use ALL CAPS for maximum impact, split into 2-3 lines +{extra_block} +DESIGN REQUIREMENTS: +- The text MUST be large, bold, and high-contrast (readable at small sizes) +- Use vibrant, eye-catching colors that match the video's mood +- Professional YouTube thumbnail aesthetic +- Clean composition — text and face/subject as clear focal points +- NO clutter, NO small text, NO watermarks""" + + if face_image_path and os.path.exists(face_image_path): + text_prompt += "\n- Include the provided face/person prominently with an exaggerated expression (surprise, excitement, shock)" + + if bg_image_path and os.path.exists(bg_image_path): + text_prompt += "\n- Use the provided background image as the base/backdrop" + + prompt_parts.append(text_prompt) + + thumbnails = [] + last_error = None + for i in range(count): + print(f"🎨 [Thumbnail] Generating thumbnail {i + 1}/{count}...") + try: + response = client.models.generate_content( + model="gemini-3.1-flash-image-preview", + contents=prompt_parts, + config=types.GenerateContentConfig( + response_modalities=["TEXT", "IMAGE"], + image_config=types.ImageConfig( + aspect_ratio="16:9", + image_size="2K" + ) + ) + ) + + for part in response.parts: + if part.text is not None: + print(f"📝 [Thumbnail] Gemini text: {part.text}") + elif image := part.as_image(): + filename = f"thumb_{i + 1}.jpg" + filepath = os.path.join(output_dir, filename) + image.save(filepath) + thumbnails.append(f"/thumbnails/{session_id}/{filename}") + print(f"✅ [Thumbnail] Saved: {filepath}") + break + + except Exception as e: + last_error = str(e) + print(f"❌ [Thumbnail] Generation {i + 1} failed: {e}") + + if not thumbnails and last_error: + raise RuntimeError(f"All thumbnail generations failed. Last error: {last_error}") + + return thumbnails diff --git a/openshorts/thumbnails/titles.py b/openshorts/thumbnails/titles.py new file mode 100644 index 00000000..125f8f29 --- /dev/null +++ b/openshorts/thumbnails/titles.py @@ -0,0 +1,170 @@ +"""Gemini-driven viral title generation and conversational refinement.""" + +import json +import time +from google import genai +from google.genai import types + + +def analyze_video_for_titles(api_key, video_path, transcript=None): + """ + Transcribes a video and uses Gemini to suggest viral YouTube titles. + If transcript is provided, skips Whisper transcription. + Returns: { "titles": [...], "transcript_summary": "...", "language": "...", "segments": [...], "video_duration": ... } + """ + if transcript is None: + from main import transcribe_video + print("🎬 [Thumbnail] Transcribing video...") + transcript = transcribe_video(video_path) + else: + print("🎬 [Thumbnail] Using pre-computed transcript (Whisper already done)...") + + print("📤 [Thumbnail] Uploading video to Gemini...") + client = genai.Client(api_key=api_key) + + file_upload = client.files.upload(file=video_path) + while True: + file_info = client.files.get(name=file_upload.name) + if file_info.state == "ACTIVE": + break + elif file_info.state == "FAILED": + raise Exception("Video processing failed by Gemini.") + time.sleep(2) + + prompt = f"""You are a YouTube title expert who creates viral, click-worthy titles. + +Analyze this video and its transcript, then suggest 10 YouTube titles that would maximize CTR (click-through rate). + +TRANSCRIPT: +{transcript['text']} + +RULES: +- Titles must be under 70 characters +- Use power words, curiosity gaps, and emotional triggers +- Mix styles: how-to, listicle, story-driven, controversial, question-based +- Make them specific to the actual content, not generic +- Include numbers where appropriate +- Consider the language of the video (detected: {transcript['language']}) +- Titles should be in the SAME LANGUAGE as the video transcript + +Also provide a brief summary of the video content (2-3 sentences). + +After generating all 10 titles, pick the TOP 2 you most recommend and explain concisely WHY (CTR potential, emotional hook, uniqueness, etc.). Reference them by their 0-based index in the titles array. + +OUTPUT JSON: +{{ + "titles": ["title1", "title2", ...], + "transcript_summary": "Brief summary of the video content...", + "language": "{transcript['language']}", + "recommended": [ + {{"index": 0, "reason": "Why this title is best..."}}, + {{"index": 3, "reason": "Why this title is second best..."}} + ] +}}""" + + print("🤖 [Thumbnail] Asking Gemini for title suggestions...") + response = client.models.generate_content( + model="gemini-2.5-flash", + contents=[file_upload, prompt], + config=types.GenerateContentConfig( + response_mime_type="application/json" + ) + ) + + # Extract segments and duration from transcript for later use + segments = transcript.get("segments", []) + video_duration = segments[-1]["end"] if segments else 0 + + try: + text = response.text.strip() + if text.startswith("```json"): + text = text[7:] + if text.startswith("```"): + text = text[3:] + if text.endswith("```"): + text = text[:-3] + text = text.strip() + + start_idx = text.find('{') + end_idx = text.rfind('}') + if start_idx != -1 and end_idx != -1: + text = text[start_idx:end_idx + 1] + + result = json.loads(text) + result["transcript_summary"] = result.get("transcript_summary", "") + result["language"] = result.get("language", transcript["language"]) + result["segments"] = segments + result["video_duration"] = video_duration + return result + except json.JSONDecodeError: + print(f"❌ [Thumbnail] Failed to parse titles JSON: {response.text}") + return { + "titles": ["Could not generate titles - please try again"], + "transcript_summary": transcript["text"][:500], + "language": transcript["language"], + "segments": segments, + "video_duration": video_duration + } + + +def refine_titles(api_key, context, user_message, conversation_history=None): + """ + Takes video context + user feedback and returns refined title suggestions. + """ + client = genai.Client(api_key=api_key) + + history_text = "" + if conversation_history: + for msg in conversation_history: + role = msg.get("role", "user") + history_text += f"\n{role.upper()}: {msg['content']}" + + prompt = f"""You are a YouTube title expert. Based on the video context and the user's feedback, suggest 8 new refined YouTube titles. + +VIDEO CONTEXT: +{context} + +CONVERSATION HISTORY:{history_text} + +USER'S NEW REQUEST: +{user_message} + +RULES: +- Titles must be under 70 characters +- Incorporate the user's feedback/direction +- Keep titles viral and click-worthy +- If the user asks for a specific style, follow it +- Titles should be in the same language as the original content + +OUTPUT JSON: +{{ + "titles": ["title1", "title2", ...] +}}""" + + response = client.models.generate_content( + model="gemini-2.5-flash", + contents=[prompt], + config=types.GenerateContentConfig( + response_mime_type="application/json" + ) + ) + + try: + text = response.text.strip() + if text.startswith("```json"): + text = text[7:] + if text.startswith("```"): + text = text[3:] + if text.endswith("```"): + text = text[:-3] + text = text.strip() + + start_idx = text.find('{') + end_idx = text.rfind('}') + if start_idx != -1 and end_idx != -1: + text = text[start_idx:end_idx + 1] + + return json.loads(text) + except json.JSONDecodeError: + print(f"❌ [Thumbnail] Failed to parse refined titles: {response.text}") + return {"titles": ["Could not refine titles - please try again"]} diff --git a/thumbnail.py b/thumbnail.py index e9cd4d5b..4761e1ff 100644 --- a/thumbnail.py +++ b/thumbnail.py @@ -1,335 +1,16 @@ -import os -import uuid -import time -import json -from google import genai -from google.genai import types -from PIL import Image +"""Compat shim: re-exports openshorts.thumbnails.* at the original import path. +This module was split into three files as part of the restructure: +- openshorts/thumbnails/titles.py (analyze_video_for_titles, refine_titles) +- openshorts/thumbnails/images.py (generate_thumbnail) +- openshorts/thumbnails/descriptions.py (generate_youtube_description) -def analyze_video_for_titles(api_key, video_path, transcript=None): - """ - Transcribes a video and uses Gemini to suggest viral YouTube titles. - If transcript is provided, skips Whisper transcription. - Returns: { "titles": [...], "transcript_summary": "...", "language": "...", "segments": [...], "video_duration": ... } - """ - if transcript is None: - from main import transcribe_video - print("🎬 [Thumbnail] Transcribing video...") - transcript = transcribe_video(video_path) - else: - print("🎬 [Thumbnail] Using pre-computed transcript (Whisper already done)...") - - print("📤 [Thumbnail] Uploading video to Gemini...") - client = genai.Client(api_key=api_key) - - file_upload = client.files.upload(file=video_path) - while True: - file_info = client.files.get(name=file_upload.name) - if file_info.state == "ACTIVE": - break - elif file_info.state == "FAILED": - raise Exception("Video processing failed by Gemini.") - time.sleep(2) - - prompt = f"""You are a YouTube title expert who creates viral, click-worthy titles. - -Analyze this video and its transcript, then suggest 10 YouTube titles that would maximize CTR (click-through rate). - -TRANSCRIPT: -{transcript['text']} - -RULES: -- Titles must be under 70 characters -- Use power words, curiosity gaps, and emotional triggers -- Mix styles: how-to, listicle, story-driven, controversial, question-based -- Make them specific to the actual content, not generic -- Include numbers where appropriate -- Consider the language of the video (detected: {transcript['language']}) -- Titles should be in the SAME LANGUAGE as the video transcript - -Also provide a brief summary of the video content (2-3 sentences). - -After generating all 10 titles, pick the TOP 2 you most recommend and explain concisely WHY (CTR potential, emotional hook, uniqueness, etc.). Reference them by their 0-based index in the titles array. - -OUTPUT JSON: -{{ - "titles": ["title1", "title2", ...], - "transcript_summary": "Brief summary of the video content...", - "language": "{transcript['language']}", - "recommended": [ - {{"index": 0, "reason": "Why this title is best..."}}, - {{"index": 3, "reason": "Why this title is second best..."}} - ] -}}""" - - print("🤖 [Thumbnail] Asking Gemini for title suggestions...") - response = client.models.generate_content( - model="gemini-2.5-flash", - contents=[file_upload, prompt], - config=types.GenerateContentConfig( - response_mime_type="application/json" - ) - ) - - # Extract segments and duration from transcript for later use - segments = transcript.get("segments", []) - video_duration = segments[-1]["end"] if segments else 0 - - try: - text = response.text.strip() - if text.startswith("```json"): - text = text[7:] - if text.startswith("```"): - text = text[3:] - if text.endswith("```"): - text = text[:-3] - text = text.strip() - - start_idx = text.find('{') - end_idx = text.rfind('}') - if start_idx != -1 and end_idx != -1: - text = text[start_idx:end_idx + 1] - - result = json.loads(text) - result["transcript_summary"] = result.get("transcript_summary", "") - result["language"] = result.get("language", transcript["language"]) - result["segments"] = segments - result["video_duration"] = video_duration - return result - except json.JSONDecodeError: - print(f"❌ [Thumbnail] Failed to parse titles JSON: {response.text}") - return { - "titles": ["Could not generate titles - please try again"], - "transcript_summary": transcript["text"][:500], - "language": transcript["language"], - "segments": segments, - "video_duration": video_duration - } - - -def refine_titles(api_key, context, user_message, conversation_history=None): - """ - Takes video context + user feedback and returns refined title suggestions. - """ - client = genai.Client(api_key=api_key) - - history_text = "" - if conversation_history: - for msg in conversation_history: - role = msg.get("role", "user") - history_text += f"\n{role.upper()}: {msg['content']}" - - prompt = f"""You are a YouTube title expert. Based on the video context and the user's feedback, suggest 8 new refined YouTube titles. - -VIDEO CONTEXT: -{context} - -CONVERSATION HISTORY:{history_text} - -USER'S NEW REQUEST: -{user_message} - -RULES: -- Titles must be under 70 characters -- Incorporate the user's feedback/direction -- Keep titles viral and click-worthy -- If the user asks for a specific style, follow it -- Titles should be in the same language as the original content - -OUTPUT JSON: -{{ - "titles": ["title1", "title2", ...] -}}""" - - response = client.models.generate_content( - model="gemini-2.5-flash", - contents=[prompt], - config=types.GenerateContentConfig( - response_mime_type="application/json" - ) - ) - - try: - text = response.text.strip() - if text.startswith("```json"): - text = text[7:] - if text.startswith("```"): - text = text[3:] - if text.endswith("```"): - text = text[:-3] - text = text.strip() - - start_idx = text.find('{') - end_idx = text.rfind('}') - if start_idx != -1 and end_idx != -1: - text = text[start_idx:end_idx + 1] - - return json.loads(text) - except json.JSONDecodeError: - print(f"❌ [Thumbnail] Failed to parse refined titles: {response.text}") - return {"titles": ["Could not refine titles - please try again"]} - - -def generate_thumbnail(api_key, title, session_id, face_image_path=None, bg_image_path=None, extra_prompt="", count=3, video_context=""): - """ - Generates YouTube thumbnails using Gemini image generation. - Returns list of saved image paths (relative URLs). - """ - client = genai.Client(api_key=api_key) - - output_dir = os.path.join("output", "thumbnails", session_id) - os.makedirs(output_dir, exist_ok=True) - - prompt_parts = [] - - # Add face image if provided - if face_image_path and os.path.exists(face_image_path): - face_img = Image.open(face_image_path) - prompt_parts.append(face_img) - - # Add background image if provided - if bg_image_path and os.path.exists(bg_image_path): - bg_img = Image.open(bg_image_path) - prompt_parts.append(bg_img) - - # Build video context block - context_block = "" - if video_context: - context_block = f""" -VIDEO CONTEXT (use this to understand the video and design a relevant thumbnail): -{video_context} +New code should import from those modules directly. This shim keeps existing +`from thumbnail import ...` calls working. """ - - # Build extra instructions block (high priority) - extra_block = "" - if extra_prompt: - extra_block = f""" -⚠️ MANDATORY USER INSTRUCTIONS (MUST follow these exactly — they override any default behavior): -{extra_prompt} -""" - - text_prompt = f"""Generate a professional, eye-catching YouTube thumbnail image. - -VIDEO TITLE (for reference — do NOT put the full title on the thumbnail): "{title}" -{context_block} -TEXT ON THE THUMBNAIL: -- Based on the title AND the video context, create a SHORT visual hook: 1 to 5 words maximum -- It should capture the core emotion, surprise, or promise of the video -- The thumbnail text should COMPLEMENT the YouTube title (which appears below), not repeat it -- Examples: "$10K EN 30 DÍAS", "ESTO FUNCIONA", "NO LO SABÍAS", "GRATIS 🔥" -- Use ALL CAPS for maximum impact, split into 2-3 lines -{extra_block} -DESIGN REQUIREMENTS: -- The text MUST be large, bold, and high-contrast (readable at small sizes) -- Use vibrant, eye-catching colors that match the video's mood -- Professional YouTube thumbnail aesthetic -- Clean composition — text and face/subject as clear focal points -- NO clutter, NO small text, NO watermarks""" - - if face_image_path and os.path.exists(face_image_path): - text_prompt += "\n- Include the provided face/person prominently with an exaggerated expression (surprise, excitement, shock)" - - if bg_image_path and os.path.exists(bg_image_path): - text_prompt += "\n- Use the provided background image as the base/backdrop" - - prompt_parts.append(text_prompt) - - thumbnails = [] - last_error = None - for i in range(count): - print(f"🎨 [Thumbnail] Generating thumbnail {i + 1}/{count}...") - try: - response = client.models.generate_content( - model="gemini-3.1-flash-image-preview", - contents=prompt_parts, - config=types.GenerateContentConfig( - response_modalities=["TEXT", "IMAGE"], - image_config=types.ImageConfig( - aspect_ratio="16:9", - image_size="2K" - ) - ) - ) - - for part in response.parts: - if part.text is not None: - print(f"📝 [Thumbnail] Gemini text: {part.text}") - elif image := part.as_image(): - filename = f"thumb_{i + 1}.jpg" - filepath = os.path.join(output_dir, filename) - image.save(filepath) - thumbnails.append(f"/thumbnails/{session_id}/{filename}") - print(f"✅ [Thumbnail] Saved: {filepath}") - break - - except Exception as e: - last_error = str(e) - print(f"❌ [Thumbnail] Generation {i + 1} failed: {e}") - - if not thumbnails and last_error: - raise RuntimeError(f"All thumbnail generations failed. Last error: {last_error}") - - return thumbnails - - -def generate_youtube_description(api_key, title, transcript_segments, language, video_duration): - """ - Uses Gemini to generate a YouTube description with chapter markers from transcript segments. - Returns: { "description": "full description text with chapters" } - """ - client = genai.Client(api_key=api_key) - - # Format segments for the prompt - formatted_segments = [] - for seg in transcript_segments: - start = seg.get("start", 0) - mins = int(start // 60) - secs = int(start % 60) - timestamp = f"{mins}:{secs:02d}" - formatted_segments.append(f"[{timestamp}] {seg.get('text', '').strip()}") - - segments_text = "\n".join(formatted_segments) - - # Format total duration - dur_mins = int(video_duration // 60) - dur_secs = int(video_duration % 60) - duration_str = f"{dur_mins}:{dur_secs:02d}" - - prompt = f"""You are a YouTube SEO expert. Generate a complete YouTube video description for the following video. - -VIDEO TITLE: "{title}" -VIDEO LANGUAGE: {language} -VIDEO DURATION: {duration_str} - -TRANSCRIPT WITH TIMESTAMPS: -{segments_text} - -REQUIREMENTS: -1. Write the description in the SAME LANGUAGE as the video ({language}) -2. Start with a compelling 2-3 sentence summary/hook -3. Add relevant CTAs (subscribe, like, comment) -4. Generate YouTube CHAPTERS based on the transcript timestamps: - - First chapter MUST start at 0:00 - - Minimum 3 chapters, each at least 10 seconds apart - - Chapter titles should be concise and descriptive - - Format: 0:00 Chapter Title - - Place chapters in their own section with a blank line before and after -5. Add 5-10 relevant hashtags at the end -6. Keep the total description under 5000 characters - -OUTPUT: Return ONLY the description text (no JSON wrapper, no markdown code blocks). The description should be ready to paste directly into YouTube.""" - - print("🤖 [Thumbnail] Generating YouTube description with chapters...") - response = client.models.generate_content( - model="gemini-2.5-flash", - contents=[prompt], - ) - - description = response.text.strip() - # Clean up any accidental markdown wrappers - if description.startswith("```"): - lines = description.split("\n") - description = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:]) - - return {"description": description} +from openshorts.thumbnails.titles import ( # noqa: F401 + analyze_video_for_titles, + refine_titles, +) +from openshorts.thumbnails.images import generate_thumbnail # noqa: F401 +from openshorts.thumbnails.descriptions import generate_youtube_description # noqa: F401 From 5680eea6d64e905a8b4afe0adf713f68ac16aece Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse Date: Tue, 19 May 2026 15:56:20 -0400 Subject: [PATCH 09/43] chore(restructure): split main.py -> video/* + ml/* + ingest/youtube.py Phase 1 step 7 (biggest split): main.py is broken into eight modules: - openshorts/video/tracking.py SmoothedCameraman, SpeakerTracker - openshorts/video/scene_analysis.py detect_scenes, analyze_scenes_strategy - openshorts/video/reframing.py create_general_frame - openshorts/video/pipeline.py process_video_to_vertical (the hot loop) - openshorts/ml/detection.py detect_face_candidates, detect_person_yolo - openshorts/ml/transcription.py transcribe_video - openshorts/ml/viral_extraction.py GEMINI_PROMPT_TEMPLATE, get_viral_clips - openshorts/ingest/youtube.py download_youtube_video, sanitize_filename main.py becomes a thin shim that re-exports the public surface for backwards compatibility AND preserves the CLI entrypoint (`python main.py -i ... -o ...`) in a private `_cli()` function. Tests stay 62/62 green. Co-Authored-By: Claude Opus 4.7 (1M context) --- main.py | 964 ++--------------------------- openshorts/ingest/youtube.py | 143 +++++ openshorts/ml/detection.py | 73 +++ openshorts/ml/transcription.py | 46 ++ openshorts/ml/viral_extraction.py | 137 ++++ openshorts/video/pipeline.py | 195 ++++++ openshorts/video/reframing.py | 42 ++ openshorts/video/scene_analysis.py | 77 +++ openshorts/video/tracking.py | 212 +++++++ 9 files changed, 991 insertions(+), 898 deletions(-) create mode 100644 openshorts/ingest/youtube.py create mode 100644 openshorts/ml/detection.py create mode 100644 openshorts/ml/transcription.py create mode 100644 openshorts/ml/viral_extraction.py create mode 100644 openshorts/video/pipeline.py create mode 100644 openshorts/video/reframing.py create mode 100644 openshorts/video/scene_analysis.py create mode 100644 openshorts/video/tracking.py diff --git a/main.py b/main.py index 0e26f8ce..07b1db9b 100644 --- a/main.py +++ b/main.py @@ -1,909 +1,73 @@ -import time -import cv2 -import scenedetect -import subprocess -import argparse -import re -import sys -from scenedetect import open_video, SceneManager -from scenedetect.detectors import ContentDetector -from ultralytics import YOLO -import torch -import os -import numpy as np -from tqdm import tqdm -import yt_dlp -import mediapipe as mp -# import whisper (replaced by faster_whisper inside function) -from google import genai -from dotenv import load_dotenv -import json - +"""Compat shim + CLI entrypoint. + +The implementation was split across the openshorts package: +- openshorts/video/tracking.py (SmoothedCameraman, SpeakerTracker) +- openshorts/video/scene_analysis.py (detect_scenes, analyze_scenes_strategy, get_video_resolution) +- openshorts/video/reframing.py (create_general_frame) +- openshorts/video/pipeline.py (process_video_to_vertical) +- openshorts/ml/detection.py (detect_face_candidates, detect_person_yolo) +- openshorts/ml/transcription.py (transcribe_video) +- openshorts/ml/viral_extraction.py (GEMINI_PROMPT_TEMPLATE, get_viral_clips) +- openshorts/ingest/youtube.py (download_youtube_video, sanitize_filename) + +New code should import from those modules directly. This shim re-exports the +public surface so existing `from main import ...` calls keep working, and +preserves the CLI entrypoint (`python main.py -i ... -o ...`). +""" import warnings warnings.filterwarnings("ignore", category=UserWarning, module='google.protobuf') -# Load environment variables +from dotenv import load_dotenv load_dotenv() -# --- Constants --- -ASPECT_RATIO = 9 / 16 - -GEMINI_PROMPT_TEMPLATE = """ -You are a senior short-form video editor. Read the ENTIRE transcript and word-level timestamps to choose the 3–15 MOST VIRAL moments for TikTok/IG Reels/YouTube Shorts. Each clip must be between 15 and 60 seconds long. - -⚠️ FFMPEG TIME CONTRACT — STRICT REQUIREMENTS: -- Return timestamps in ABSOLUTE SECONDS from the start of the video (usable in: ffmpeg -ss -to -i ...). -- Only NUMBERS with decimal point, up to 3 decimals (examples: 0, 1.250, 17.350). -- Ensure 0 ≤ start < end ≤ VIDEO_DURATION_SECONDS. -- Each clip between 15 and 60 s (inclusive). -- Prefer starting 0.2–0.4 s BEFORE the hook and ending 0.2–0.4 s AFTER the payoff. -- Use silence moments for natural cuts; never cut in the middle of a word or phrase. -- STRICTLY FORBIDDEN to use time formats other than absolute seconds. - -VIDEO_DURATION_SECONDS: {video_duration} - -TRANSCRIPT_TEXT (raw): -{transcript_text} - -WORDS_JSON (array of {{w, s, e}} where s/e are seconds): -{words_json} - -STRICT EXCLUSIONS: -- No generic intros/outros or purely sponsorship segments unless they contain the hook. -- No clips < 15 s or > 60 s. - -OUTPUT — RETURN ONLY VALID JSON (no markdown, no comments). Order clips by predicted performance (best to worst). In the descriptions, ALWAYS include a CTA like "Follow me and comment X and I'll send you the workflow" (especially if discussing an n8n workflow): -{{ - "shorts": [ - {{ - "start": , - "end": , - "video_description_for_tiktok": "", - "video_description_for_instagram": "", - "video_title_for_youtube_short": "", - "viral_hook_text": "<SHORT punchy text overlay (max 10 words). MUST BE IN THE SAME LANGUAGE AS THE VIDEO TRANSCRIPT. Examples: 'POV: You realized...', 'Did you know?', 'Stop doing this!'>" - }} - ] -}} -""" +# Re-exports (used by app.py, thumbnail.py, and existing tests) +from openshorts.video.tracking import ( # noqa: F401 + ASPECT_RATIO, + SmoothedCameraman, + SpeakerTracker, +) +from openshorts.video.scene_analysis import ( # noqa: F401 + detect_scenes, + get_video_resolution, + analyze_scenes_strategy, +) +from openshorts.video.reframing import create_general_frame # noqa: F401 +from openshorts.video.pipeline import process_video_to_vertical # noqa: F401 +from openshorts.ml.detection import detect_face_candidates, detect_person_yolo # noqa: F401 +from openshorts.ml.transcription import transcribe_video # noqa: F401 +from openshorts.ml.viral_extraction import GEMINI_PROMPT_TEMPLATE, get_viral_clips # noqa: F401 +from openshorts.ingest.youtube import download_youtube_video, sanitize_filename # noqa: F401 + + +def _cli(): + import argparse + import json + import os + import subprocess + import time + + import cv2 -# Load the YOLO model once (Keep for backup or scene analysis if needed) -model = YOLO('yolov8n.pt') - -# --- MediaPipe Setup --- -# Use standard Face Detection (BlazeFace) for speed -mp_face_detection = mp.solutions.face_detection -face_detection = mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5) - -class SmoothedCameraman: - """ - Handles smooth camera movement. - Simplified Logic: "Heavy Tripod" - Only moves if the subject leaves the center safe zone. - Moves slowly and linearly. - """ - def __init__(self, output_width, output_height, video_width, video_height): - self.output_width = output_width - self.output_height = output_height - self.video_width = video_width - self.video_height = video_height - - # Initial State - self.current_center_x = video_width / 2 - self.target_center_x = video_width / 2 - - # Calculate crop dimensions once - self.crop_height = video_height - self.crop_width = int(self.crop_height * ASPECT_RATIO) - if self.crop_width > video_width: - self.crop_width = video_width - self.crop_height = int(self.crop_width / ASPECT_RATIO) - - # Safe Zone: 20% of the video width - # As long as the target is within this zone relative to current center, DO NOT MOVE. - self.safe_zone_radius = self.crop_width * 0.25 - - def update_target(self, face_box): - """ - Updates the target center based on detected face/person. - """ - if face_box: - x, y, w, h = face_box - self.target_center_x = x + w / 2 - - def get_crop_box(self, force_snap=False): - """ - Returns the (x1, y1, x2, y2) for the current frame. - """ - if force_snap: - self.current_center_x = self.target_center_x - else: - diff = self.target_center_x - self.current_center_x - - # SIMPLIFIED LOGIC: - # 1. Is the target outside the safe zone? - if abs(diff) > self.safe_zone_radius: - # 2. If yes, move towards it slowly (Linear Speed) - # Determine direction - direction = 1 if diff > 0 else -1 - - # Speed: 2 pixels per frame (Slow pan) - # If the distance is HUGE (scene change or fast movement), speed up slightly - if abs(diff) > self.crop_width * 0.5: - speed = 15.0 # Fast re-frame - else: - speed = 3.0 # Slow, steady pan - - self.current_center_x += direction * speed - - # Check if we overshot (prevent oscillation) - new_diff = self.target_center_x - self.current_center_x - if (direction == 1 and new_diff < 0) or (direction == -1 and new_diff > 0): - self.current_center_x = self.target_center_x - - # If inside safe zone, DO NOTHING (Stationary Camera) - - # Clamp center - half_crop = self.crop_width / 2 - - if self.current_center_x - half_crop < 0: - self.current_center_x = half_crop - if self.current_center_x + half_crop > self.video_width: - self.current_center_x = self.video_width - half_crop - - x1 = int(self.current_center_x - half_crop) - x2 = int(self.current_center_x + half_crop) - - x1 = max(0, x1) - x2 = min(self.video_width, x2) - - y1 = 0 - y2 = self.video_height - - return x1, y1, x2, y2 - -class SpeakerTracker: - """ - Tracks speakers over time to prevent rapid switching and handle temporary obstructions. - """ - def __init__(self, stabilization_frames=15, cooldown_frames=30): - self.active_speaker_id = None - self.speaker_scores = {} # {id: score} - self.last_seen = {} # {id: frame_number} - self.locked_counter = 0 # How long we've been locked on current speaker - - # Hyperparameters - self.stabilization_threshold = stabilization_frames # Frames needed to confirm a new speaker - self.switch_cooldown = cooldown_frames # Minimum frames before switching again - self.last_switch_frame = -1000 - - # ID tracking - self.next_id = 0 - self.known_faces = [] # [{'id': 0, 'center': x, 'last_frame': 123}] - - def get_target(self, face_candidates, frame_number, width): - """ - Decides which face to focus on. - face_candidates: list of {'box': [x,y,w,h], 'score': float} - """ - current_candidates = [] - - # 1. Match faces to known IDs (simple distance tracking) - for face in face_candidates: - x, y, w, h = face['box'] - center_x = x + w / 2 - - best_match_id = -1 - min_dist = width * 0.15 # Reduced matching radius to avoid jumping in groups - - # Try to match with known faces seen recently - for kf in self.known_faces: - if frame_number - kf['last_frame'] > 30: # Forgot faces older than 1s (was 2s) - continue - - dist = abs(center_x - kf['center']) - if dist < min_dist: - min_dist = dist - best_match_id = kf['id'] - - # If no match, assign new ID - if best_match_id == -1: - best_match_id = self.next_id - self.next_id += 1 - - # Update known face - self.known_faces = [kf for kf in self.known_faces if kf['id'] != best_match_id] - self.known_faces.append({'id': best_match_id, 'center': center_x, 'last_frame': frame_number}) - - current_candidates.append({ - 'id': best_match_id, - 'box': face['box'], - 'score': face['score'] - }) - - # 2. Update Scores with decay - for pid in list(self.speaker_scores.keys()): - self.speaker_scores[pid] *= 0.85 # Faster decay (was 0.9) - if self.speaker_scores[pid] < 0.1: - del self.speaker_scores[pid] - - # Add new scores - for cand in current_candidates: - pid = cand['id'] - # Score is purely based on size (proximity) now that we don't have mouth - raw_score = cand['score'] / (width * width * 0.05) - self.speaker_scores[pid] = self.speaker_scores.get(pid, 0) + raw_score - - # 3. Determine Best Speaker - if not current_candidates: - # If no one found, maintain last active speaker if cooldown allows - # to avoid black screen or jump to 0,0 - return None - - best_candidate = None - max_score = -1 - - for cand in current_candidates: - pid = cand['id'] - total_score = self.speaker_scores.get(pid, 0) - - # Hysteresis: HUGE Bonus for current active speaker - if pid == self.active_speaker_id: - total_score *= 3.0 # Sticky factor - - if total_score > max_score: - max_score = total_score - best_candidate = cand - - # 4. Decide Switch - if best_candidate: - target_id = best_candidate['id'] - - if target_id == self.active_speaker_id: - self.locked_counter += 1 - return best_candidate['box'] - - # New person - if frame_number - self.last_switch_frame < self.switch_cooldown: - old_cand = next((c for c in current_candidates if c['id'] == self.active_speaker_id), None) - if old_cand: - return old_cand['box'] - - self.active_speaker_id = target_id - self.last_switch_frame = frame_number - self.locked_counter = 0 - return best_candidate['box'] - - return None - -def detect_face_candidates(frame): - """ - Returns list of all detected faces using lightweight FaceDetection. - """ - height, width, _ = frame.shape - rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - results = face_detection.process(rgb_frame) - - candidates = [] - - if not results.detections: - return [] - - for detection in results.detections: - bboxC = detection.location_data.relative_bounding_box - x = int(bboxC.xmin * width) - y = int(bboxC.ymin * height) - w = int(bboxC.width * width) - h = int(bboxC.height * height) - - candidates.append({ - 'box': [x, y, w, h], - 'score': w * h # Area as score - }) - - return candidates - -def detect_person_yolo(frame): - """ - Fallback: Detect largest person using YOLO when face detection fails. - Returns [x, y, w, h] of the person's 'upper body' approximation. - """ - # Use the globally loaded model - results = model(frame, verbose=False, classes=[0]) # class 0 is person - - if not results: - return None - - best_box = None - max_area = 0 - - for result in results: - boxes = result.boxes - for box in boxes: - x1, y1, x2, y2 = [int(i) for i in box.xyxy[0]] - w = x2 - x1 - h = y2 - y1 - area = w * h - - if area > max_area: - max_area = area - # Focus on the top 40% of the person (head/chest) for framing - # This approximates where the face is if we can't detect it directly - face_h = int(h * 0.4) - best_box = [x1, y1, w, face_h] - - return best_box - -def create_general_frame(frame, output_width, output_height): - """ - Creates a 'General Shot' frame: - - Background: Blurred zoom of original - - Foreground: Original video scaled to fit width, centered vertically. - """ - orig_h, orig_w = frame.shape[:2] - - # 1. Background (Fill Height) - # Crop center to aspect ratio - bg_scale = output_height / orig_h - bg_w = int(orig_w * bg_scale) - bg_resized = cv2.resize(frame, (bg_w, output_height)) - - # Crop center of background - start_x = (bg_w - output_width) // 2 - if start_x < 0: start_x = 0 - background = bg_resized[:, start_x:start_x+output_width] - if background.shape[1] != output_width: - background = cv2.resize(background, (output_width, output_height)) - - # Blur background - background = cv2.GaussianBlur(background, (51, 51), 0) - - # 2. Foreground (Fit Width) - scale = output_width / orig_w - fg_h = int(orig_h * scale) - foreground = cv2.resize(frame, (output_width, fg_h)) - - # 3. Overlay - y_offset = (output_height - fg_h) // 2 - - # Clone background to avoid modifying it - final_frame = background.copy() - final_frame[y_offset:y_offset+fg_h, :] = foreground - - return final_frame - -def analyze_scenes_strategy(video_path, scenes): - """ - Analyzes each scene to determine if it should be TRACK (Single person) or GENERAL (Group/Wide). - Returns list of strategies corresponding to scenes. - """ - cap = cv2.VideoCapture(video_path) - strategies = [] - - if not cap.isOpened(): - return ['TRACK'] * len(scenes) - - for start, end in tqdm(scenes, desc=" Analyzing Scenes"): - # Sample 3 frames (start, middle, end) - frames_to_check = [ - start.get_frames() + 5, - int((start.get_frames() + end.get_frames()) / 2), - end.get_frames() - 5 - ] - - face_counts = [] - for f_idx in frames_to_check: - cap.set(cv2.CAP_PROP_POS_FRAMES, f_idx) - ret, frame = cap.read() - if not ret: continue - - # Detect faces - candidates = detect_face_candidates(frame) - face_counts.append(len(candidates)) - - # Decision Logic - if not face_counts: - avg_faces = 0 - else: - avg_faces = sum(face_counts) / len(face_counts) - - # Strategy: - # 0 faces -> GENERAL (Landscape/B-roll) - # 1 face -> TRACK - # > 1.2 faces -> GENERAL (Group) - - if avg_faces > 1.2 or avg_faces < 0.5: - strategies.append('GENERAL') - else: - strategies.append('TRACK') - - cap.release() - return strategies - -def detect_scenes(video_path): - video = open_video(video_path) - scene_manager = SceneManager() - scene_manager.add_detector(ContentDetector()) - scene_manager.detect_scenes(video=video) - scene_list = scene_manager.get_scene_list() - fps = video.frame_rate - return scene_list, fps - -def get_video_resolution(video_path): - cap = cv2.VideoCapture(video_path) - if not cap.isOpened(): - raise IOError(f"Could not open video file {video_path}") - width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - cap.release() - return width, height - - -def sanitize_filename(filename): - """Remove invalid characters from filename.""" - filename = re.sub(r'[<>:"/\\|?*#]', '', filename) - filename = filename.replace(' ', '_') - return filename[:100] - - -def download_youtube_video(url, output_dir="."): - """ - Downloads a YouTube video using yt-dlp. - Returns the path to the downloaded video and the video title. - """ - print(f"🔍 Debug: yt-dlp version: {yt_dlp.version.__version__}") - print("📥 Downloading video from YouTube...") - step_start_time = time.time() - - cookies_path = '/app/cookies.txt' - cookies_env = os.environ.get("YOUTUBE_COOKIES") - if cookies_env: - print("🍪 Found YOUTUBE_COOKIES env var, creating cookies file inside container...") - try: - with open(cookies_path, 'w') as f: - f.write(cookies_env) - if os.path.exists(cookies_path): - print(f" Debug: Cookies file created. Size: {os.path.getsize(cookies_path)} bytes") - with open(cookies_path, 'r') as f: - content = f.read(100) - print(f" Debug: First 100 chars of cookie file: {content}") - except Exception as e: - print(f"⚠️ Failed to write cookies file: {e}") - cookies_path = None - else: - cookies_path = None - print("⚠️ YOUTUBE_COOKIES env var not found.") - - # Common yt-dlp options to work around YouTube bot detection. - # extractor_args tries multiple player clients in order; tv_embed / android - # avoid the OAuth/PO-token checks that block server IPs. - _COMMON_YDL_OPTS = { - 'quiet': False, - 'verbose': True, - 'no_warnings': False, - 'cookiefile': cookies_path if cookies_path else None, - 'socket_timeout': 30, - 'retries': 10, - 'fragment_retries': 10, - 'nocheckcertificate': True, - 'cachedir': False, - 'extractor_args': { - 'youtube': { - 'player_client': ['tv_embed', 'android', 'mweb', 'web'], - 'player_skip': ['webpage', 'configs'], - } - }, - 'http_headers': { - 'User-Agent': ( - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' - 'AppleWebKit/537.36 (KHTML, like Gecko) ' - 'Chrome/120.0.0.0 Safari/537.36' - ), - }, - } - - with yt_dlp.YoutubeDL(_COMMON_YDL_OPTS) as ydl: - try: - info = ydl.extract_info(url, download=False) - video_title = info.get('title', 'youtube_video') - sanitized_title = sanitize_filename(video_title) - except Exception as e: - # Force print to stderr/stdout immediately so it's captured before crash - import sys - import traceback - - # Print minimal error first to ensure something gets out - print("🚨 YOUTUBE DOWNLOAD ERROR 🚨", file=sys.stderr) - - error_msg = f""" - -❌ ================================================================= ❌ -❌ FATAL ERROR: YOUTUBE DOWNLOAD FAILED -❌ ================================================================= ❌ - -REASON: YouTube has blocked the download request (Error 429/Unavailable). - This is likely a temporary IP ban on this server. - -👇 SOLUTION FOR USER 👇 ---------------------------------------------------------------------- -1. Download the video manually to your computer. -2. Use the 'Upload Video' tab in this app to process it. ---------------------------------------------------------------------- - -Technical Details: {str(e)} - """ - # Print to both streams to ensure capture - print(error_msg, file=sys.stdout) - print(error_msg, file=sys.stderr) - - # Force flush - sys.stdout.flush() - sys.stderr.flush() - - # Wait a split second to allow buffer to drain before raising - time.sleep(0.5) - - raise e - - output_template = os.path.join(output_dir, f'{sanitized_title}.%(ext)s') - expected_file = os.path.join(output_dir, f'{sanitized_title}.mp4') - if os.path.exists(expected_file): - os.remove(expected_file) - print(f"🗑️ Removed existing file to re-download with H.264 codec") - - ydl_opts = { - **_COMMON_YDL_OPTS, - 'format': 'bestvideo[vcodec^=avc1][ext=mp4]+bestaudio[ext=m4a]/bestvideo[vcodec^=avc1]+bestaudio/best[ext=mp4]/best', - 'outtmpl': output_template, - 'merge_output_format': 'mp4', - 'overwrites': True, - } - - with yt_dlp.YoutubeDL(ydl_opts) as ydl: - ydl.download([url]) - - downloaded_file = os.path.join(output_dir, f'{sanitized_title}.mp4') - - if not os.path.exists(downloaded_file): - for f in os.listdir(output_dir): - if f.startswith(sanitized_title) and f.endswith('.mp4'): - downloaded_file = os.path.join(output_dir, f) - break - - step_end_time = time.time() - print(f"✅ Video downloaded in {step_end_time - step_start_time:.2f}s: {downloaded_file}") - - return downloaded_file, sanitized_title - -def process_video_to_vertical(input_video, final_output_video): - """ - Core logic to convert horizontal video to vertical using scene detection and Active Speaker Tracking (MediaPipe). - """ - script_start_time = time.time() - - # Define temporary file paths based on the output name - base_name = os.path.splitext(final_output_video)[0] - temp_video_output = f"{base_name}_temp_video.mp4" - temp_audio_output = f"{base_name}_temp_audio.aac" - - # Clean up previous temp files if they exist - if os.path.exists(temp_video_output): os.remove(temp_video_output) - if os.path.exists(temp_audio_output): os.remove(temp_audio_output) - if os.path.exists(final_output_video): os.remove(final_output_video) - - print(f"🎬 Processing clip: {input_video}") - print(" Step 1: Detecting scenes...") - scenes, fps = detect_scenes(input_video) - - if not scenes: - print(" ❌ No scenes were detected. Using full video as one scene.") - # If scene detection fails or finds nothing, treat whole video as one scene - cap = cv2.VideoCapture(input_video) - total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - cap.release() - from scenedetect import FrameTimecode - scenes = [(FrameTimecode(0, fps), FrameTimecode(total_frames, fps))] - - print(f" ✅ Found {len(scenes)} scenes.") - - print("\n 🧠 Step 2: Preparing Active Tracking...") - original_width, original_height = get_video_resolution(input_video) - - OUTPUT_HEIGHT = original_height - OUTPUT_WIDTH = int(OUTPUT_HEIGHT * ASPECT_RATIO) - if OUTPUT_WIDTH % 2 != 0: - OUTPUT_WIDTH += 1 - - # Initialize Cameraman - cameraman = SmoothedCameraman(OUTPUT_WIDTH, OUTPUT_HEIGHT, original_width, original_height) - - # --- New Strategy: Per-Scene Analysis --- - print("\n 🤖 Step 3: Analyzing Scenes for Strategy (Single vs Group)...") - scene_strategies = analyze_scenes_strategy(input_video, scenes) - # scene_strategies is a list of 'TRACK' or 'General' corresponding to scenes - - print("\n ✂️ Step 4: Processing video frames...") - - command = [ - 'ffmpeg', '-y', '-f', 'rawvideo', '-vcodec', 'rawvideo', - '-s', f'{OUTPUT_WIDTH}x{OUTPUT_HEIGHT}', '-pix_fmt', 'bgr24', - '-r', str(fps), '-i', '-', '-c:v', 'libx264', - '-preset', 'fast', '-crf', '23', '-an', temp_video_output - ] - - ffmpeg_process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) - - cap = cv2.VideoCapture(input_video) - total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - - frame_number = 0 - current_scene_index = 0 - - # Pre-calculate scene boundaries - scene_boundaries = [] - for s_start, s_end in scenes: - scene_boundaries.append((s_start.get_frames(), s_end.get_frames())) - - # Global tracker for single-person shots - speaker_tracker = SpeakerTracker(cooldown_frames=30) - - with tqdm(total=total_frames, desc=" Processing", file=sys.stdout) as pbar: - while cap.isOpened(): - ret, frame = cap.read() - if not ret: - break - - # Update Scene Index - if current_scene_index < len(scene_boundaries): - start_f, end_f = scene_boundaries[current_scene_index] - if frame_number >= end_f and current_scene_index < len(scene_boundaries) - 1: - current_scene_index += 1 - - # Determine Strategy for current frame based on scene - current_strategy = scene_strategies[current_scene_index] if current_scene_index < len(scene_strategies) else 'TRACK' - - # Apply Strategy - if current_strategy == 'GENERAL': - # "Plano General" -> Blur Background + Fit Width - output_frame = create_general_frame(frame, OUTPUT_WIDTH, OUTPUT_HEIGHT) - - # Reset cameraman/tracker so they don't drift while inactive - cameraman.current_center_x = original_width / 2 - cameraman.target_center_x = original_width / 2 - - else: - # "Single Speaker" -> Track & Crop - - # Detect every 2nd frame for performance - if frame_number % 2 == 0: - candidates = detect_face_candidates(frame) - target_box = speaker_tracker.get_target(candidates, frame_number, original_width) - if target_box: - cameraman.update_target(target_box) - else: - person_box = detect_person_yolo(frame) - if person_box: - cameraman.update_target(person_box) - - # Snap camera on scene change to avoid panning from previous scene position - is_scene_start = (frame_number == scene_boundaries[current_scene_index][0]) - - x1, y1, x2, y2 = cameraman.get_crop_box(force_snap=is_scene_start) - - # Crop - if y2 > y1 and x2 > x1: - cropped = frame[y1:y2, x1:x2] - output_frame = cv2.resize(cropped, (OUTPUT_WIDTH, OUTPUT_HEIGHT)) - else: - output_frame = cv2.resize(frame, (OUTPUT_WIDTH, OUTPUT_HEIGHT)) - - ffmpeg_process.stdin.write(output_frame.tobytes()) - frame_number += 1 - pbar.update(1) - - ffmpeg_process.stdin.close() - stderr_output = ffmpeg_process.stderr.read().decode() - ffmpeg_process.wait() - cap.release() - - if ffmpeg_process.returncode != 0: - print("\n ❌ FFmpeg frame processing failed.") - print(" Stderr:", stderr_output) - return False - - print("\n 🔊 Step 5: Extracting audio...") - audio_extract_command = [ - 'ffmpeg', '-y', '-i', input_video, '-vn', '-acodec', 'copy', temp_audio_output - ] - try: - subprocess.run(audio_extract_command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) - except subprocess.CalledProcessError: - print("\n ❌ Audio extraction failed (maybe no audio?). Proceeding without audio.") - pass - - print("\n ✨ Step 6: Merging...") - if os.path.exists(temp_audio_output): - merge_command = [ - 'ffmpeg', '-y', '-i', temp_video_output, '-i', temp_audio_output, - '-c:v', 'copy', '-c:a', 'copy', final_output_video - ] - else: - merge_command = [ - 'ffmpeg', '-y', '-i', temp_video_output, - '-c:v', 'copy', final_output_video - ] - - try: - subprocess.run(merge_command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) - print(f" ✅ Clip saved to {final_output_video}") - except subprocess.CalledProcessError as e: - print("\n ❌ Final merge failed.") - print(" Stderr:", e.stderr.decode()) - return False - - # Clean up temp files - if os.path.exists(temp_video_output): os.remove(temp_video_output) - if os.path.exists(temp_audio_output): os.remove(temp_audio_output) - - return True - -def transcribe_video(video_path): - print("🎙️ Transcribing video with Faster-Whisper (CPU Optimized)...") - from faster_whisper import WhisperModel - - # Run on CPU with INT8 quantization for speed - model = WhisperModel("base", device="cpu", compute_type="int8") - - segments, info = model.transcribe(video_path, word_timestamps=True) - - print(f" Detected language '{info.language}' with probability {info.language_probability:.2f}") - - # Convert to openai-whisper compatible format - transcript_segments = [] - full_text = "" - - for segment in segments: - # Print progress to keep user informed (and prevent timeouts feeling) - print(f" [{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}") - - seg_dict = { - 'text': segment.text, - 'start': segment.start, - 'end': segment.end, - 'words': [] - } - - if segment.words: - for word in segment.words: - seg_dict['words'].append({ - 'word': word.word, - 'start': word.start, - 'end': word.end, - 'probability': word.probability - }) - - transcript_segments.append(seg_dict) - full_text += segment.text + " " - - return { - 'text': full_text.strip(), - 'segments': transcript_segments, - 'language': info.language - } - -def get_viral_clips(transcript_result, video_duration): - print("🤖 Analyzing with Gemini...") - - api_key = os.getenv("GEMINI_API_KEY") - if not api_key: - print("❌ Error: GEMINI_API_KEY not found in environment variables.") - return None - - - client = genai.Client(api_key=api_key) - - # We use gemini-2.5-flash as requested. - model_name = 'gemini-2.5-flash' - - print(f"🤖 Initializing Gemini with model: {model_name}") - - # Extract words - words = [] - for segment in transcript_result['segments']: - for word in segment.get('words', []): - words.append({ - 'w': word['word'], - 's': word['start'], - 'e': word['end'] - }) - - prompt = GEMINI_PROMPT_TEMPLATE.format( - video_duration=video_duration, - transcript_text=json.dumps(transcript_result['text']), - words_json=json.dumps(words) - ) - - try: - response = client.models.generate_content( - model=model_name, - contents=prompt - ) - - # --- Cost Calculation --- - try: - usage = response.usage_metadata - if usage: - # Gemini 2.5 Flash Pricing (Dec 2025) - # Input: $0.10 per 1M tokens - # Output: $0.40 per 1M tokens - - input_price_per_million = 0.10 - output_price_per_million = 0.40 - - prompt_tokens = usage.prompt_token_count - output_tokens = usage.candidates_token_count - - input_cost = (prompt_tokens / 1_000_000) * input_price_per_million - output_cost = (output_tokens / 1_000_000) * output_price_per_million - total_cost = input_cost + output_cost - - cost_analysis = { - "input_tokens": prompt_tokens, - "output_tokens": output_tokens, - "input_cost": input_cost, - "output_cost": output_cost, - "total_cost": total_cost, - "model": model_name - } - - print(f"💰 Token Usage ({model_name}):") - print(f" - Input Tokens: {prompt_tokens} (${input_cost:.6f})") - print(f" - Output Tokens: {output_tokens} (${output_cost:.6f})") - print(f" - Total Estimated Cost: ${total_cost:.6f}") - - except Exception as e: - print(f"⚠️ Could not calculate cost: {e}") - cost_analysis = None - # ------------------------ - - # Clean response if it contains markdown code blocks - text = response.text - if text.startswith("```json"): - text = text[7:] - if text.endswith("```"): - text = text[:-3] - text = text.strip() - - result_json = json.loads(text) - if cost_analysis: - result_json['cost_analysis'] = cost_analysis - - return result_json - except Exception as e: - print(f"❌ Gemini Error: {e}") - return None - -if __name__ == '__main__': parser = argparse.ArgumentParser(description="AutoCrop-Vertical with Viral Clip Detection.") - + input_group = parser.add_mutually_exclusive_group(required=True) input_group.add_argument('-i', '--input', type=str, help="Path to the input video file.") input_group.add_argument('-u', '--url', type=str, help="YouTube URL to download and process.") - + parser.add_argument('-o', '--output', type=str, help="Output directory or file (if processing whole video).") parser.add_argument('--keep-original', action='store_true', help="Keep the downloaded YouTube video.") parser.add_argument('--skip-analysis', action='store_true', help="Skip AI analysis and convert the whole video.") - + args = parser.parse_args() script_start_time = time.time() - + def _ensure_dir(path: str) -> str: """Create directory if missing and return the same path.""" if path: os.makedirs(path, exist_ok=True) return path - + # 1. Get Input Video if args.url: # For multi-clip runs, treat --output as an OUTPUT DIRECTORY (create it if needed). @@ -918,12 +82,12 @@ def _ensure_dir(path: str) -> str: output_dir = os.path.dirname(args.output) or "." else: output_dir = "." - + input_video, video_title = download_youtube_video(args.url, output_dir) else: input_video = args.input video_title = os.path.splitext(os.path.basename(input_video))[0] - + if args.output and not args.skip_analysis: # For multi-clip runs, treat --output as an OUTPUT DIRECTORY (create it if needed). output_dir = _ensure_dir(args.output) @@ -948,7 +112,7 @@ def _ensure_dir(path: str) -> str: else: # 3. Transcribe transcript = transcribe_video(input_video) - + # Get duration cap = cv2.VideoCapture(input_video) fps = cap.get(cv2.CAP_PROP_FPS) @@ -958,14 +122,14 @@ def _ensure_dir(path: str) -> str: # 4. Gemini Analysis clips_data = get_viral_clips(transcript, duration) - + if not clips_data or 'shorts' not in clips_data: print("❌ Failed to identify clips. Converting whole video as fallback.") output_file = os.path.join(output_dir, f"{video_title}_vertical.mp4") process_video_to_vertical(input_video, output_file) else: print(f"🔥 Found {len(clips_data['shorts'])} viral clips!") - + # Save metadata clips_data['transcript'] = transcript # Save full transcript for subtitles metadata_file = os.path.join(output_dir, f"{video_title}_metadata.json") @@ -979,31 +143,31 @@ def _ensure_dir(path: str) -> str: end = clip['end'] print(f"\n🎬 Processing Clip {i+1}: {start}s - {end}s") print(f" Title: {clip.get('video_title_for_youtube_short', 'No Title')}") - + # Cut clip clip_filename = f"{video_title}_clip_{i+1}.mp4" clip_temp_path = os.path.join(output_dir, f"temp_{clip_filename}") clip_final_path = os.path.join(output_dir, clip_filename) - + # ffmpeg cut # Using re-encoding for precision as requested by strict seconds cut_command = [ - 'ffmpeg', '-y', - '-ss', str(start), - '-to', str(end), + 'ffmpeg', '-y', + '-ss', str(start), + '-to', str(end), '-i', input_video, '-c:v', 'libx264', '-crf', '18', '-preset', 'fast', '-c:a', 'aac', clip_temp_path ] subprocess.run(cut_command, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) - + # Process vertical success = process_video_to_vertical(clip_temp_path, clip_final_path) - + if success: print(f" ✅ Clip {i+1} ready: {clip_final_path}") - + # Clean up temp cut if os.path.exists(clip_temp_path): os.remove(clip_temp_path) @@ -1015,3 +179,7 @@ def _ensure_dir(path: str) -> str: total_time = time.time() - script_start_time print(f"\n⏱️ Total execution time: {total_time:.2f}s") + + +if __name__ == '__main__': + _cli() diff --git a/openshorts/ingest/youtube.py b/openshorts/ingest/youtube.py new file mode 100644 index 00000000..a80ebf46 --- /dev/null +++ b/openshorts/ingest/youtube.py @@ -0,0 +1,143 @@ +"""YouTube downloader with bot-detection workarounds (yt-dlp + cookies + alt clients).""" + +import os +import re +import sys +import time +import yt_dlp + + +def sanitize_filename(filename): + """Remove invalid characters from filename.""" + filename = re.sub(r'[<>:"/\\|?*#]', '', filename) + filename = filename.replace(' ', '_') + return filename[:100] + + +def download_youtube_video(url, output_dir="."): + """ + Downloads a YouTube video using yt-dlp. + Returns the path to the downloaded video and the video title. + """ + print(f"🔍 Debug: yt-dlp version: {yt_dlp.version.__version__}") + print("📥 Downloading video from YouTube...") + step_start_time = time.time() + + cookies_path = '/app/cookies.txt' + cookies_env = os.environ.get("YOUTUBE_COOKIES") + if cookies_env: + print("🍪 Found YOUTUBE_COOKIES env var, creating cookies file inside container...") + try: + with open(cookies_path, 'w') as f: + f.write(cookies_env) + if os.path.exists(cookies_path): + print(f" Debug: Cookies file created. Size: {os.path.getsize(cookies_path)} bytes") + with open(cookies_path, 'r') as f: + content = f.read(100) + print(f" Debug: First 100 chars of cookie file: {content}") + except Exception as e: + print(f"⚠️ Failed to write cookies file: {e}") + cookies_path = None + else: + cookies_path = None + print("⚠️ YOUTUBE_COOKIES env var not found.") + + # Common yt-dlp options to work around YouTube bot detection. + # extractor_args tries multiple player clients in order; tv_embed / android + # avoid the OAuth/PO-token checks that block server IPs. + _COMMON_YDL_OPTS = { + 'quiet': False, + 'verbose': True, + 'no_warnings': False, + 'cookiefile': cookies_path if cookies_path else None, + 'socket_timeout': 30, + 'retries': 10, + 'fragment_retries': 10, + 'nocheckcertificate': True, + 'cachedir': False, + 'extractor_args': { + 'youtube': { + 'player_client': ['tv_embed', 'android', 'mweb', 'web'], + 'player_skip': ['webpage', 'configs'], + } + }, + 'http_headers': { + 'User-Agent': ( + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' + 'AppleWebKit/537.36 (KHTML, like Gecko) ' + 'Chrome/120.0.0.0 Safari/537.36' + ), + }, + } + + with yt_dlp.YoutubeDL(_COMMON_YDL_OPTS) as ydl: + try: + info = ydl.extract_info(url, download=False) + video_title = info.get('title', 'youtube_video') + sanitized_title = sanitize_filename(video_title) + except Exception as e: + # Force print to stderr/stdout immediately so it's captured before crash + import traceback + + # Print minimal error first to ensure something gets out + print("🚨 YOUTUBE DOWNLOAD ERROR 🚨", file=sys.stderr) + + error_msg = f""" + +❌ ================================================================= ❌ +❌ FATAL ERROR: YOUTUBE DOWNLOAD FAILED +❌ ================================================================= ❌ + +REASON: YouTube has blocked the download request (Error 429/Unavailable). + This is likely a temporary IP ban on this server. + +👇 SOLUTION FOR USER 👇 +--------------------------------------------------------------------- +1. Download the video manually to your computer. +2. Use the 'Upload Video' tab in this app to process it. +--------------------------------------------------------------------- + +Technical Details: {str(e)} + """ + # Print to both streams to ensure capture + print(error_msg, file=sys.stdout) + print(error_msg, file=sys.stderr) + + # Force flush + sys.stdout.flush() + sys.stderr.flush() + + # Wait a split second to allow buffer to drain before raising + time.sleep(0.5) + + raise e + + output_template = os.path.join(output_dir, f'{sanitized_title}.%(ext)s') + expected_file = os.path.join(output_dir, f'{sanitized_title}.mp4') + if os.path.exists(expected_file): + os.remove(expected_file) + print(f"🗑️ Removed existing file to re-download with H.264 codec") + + ydl_opts = { + **_COMMON_YDL_OPTS, + 'format': 'bestvideo[vcodec^=avc1][ext=mp4]+bestaudio[ext=m4a]/bestvideo[vcodec^=avc1]+bestaudio/best[ext=mp4]/best', + 'outtmpl': output_template, + 'merge_output_format': 'mp4', + 'overwrites': True, + } + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download([url]) + + downloaded_file = os.path.join(output_dir, f'{sanitized_title}.mp4') + + if not os.path.exists(downloaded_file): + for f in os.listdir(output_dir): + if f.startswith(sanitized_title) and f.endswith('.mp4'): + downloaded_file = os.path.join(output_dir, f) + break + + step_end_time = time.time() + print(f"✅ Video downloaded in {step_end_time - step_start_time:.2f}s: {downloaded_file}") + + return downloaded_file, sanitized_title diff --git a/openshorts/ml/detection.py b/openshorts/ml/detection.py new file mode 100644 index 00000000..f48900fd --- /dev/null +++ b/openshorts/ml/detection.py @@ -0,0 +1,73 @@ +"""Face and person detection: MediaPipe BlazeFace (primary) + YOLOv8 (fallback).""" + +import cv2 +from ultralytics import YOLO +import mediapipe as mp + +# Load the YOLO model once (Keep for backup or scene analysis if needed) +model = YOLO('yolov8n.pt') + +# --- MediaPipe Setup --- +# Use standard Face Detection (BlazeFace) for speed +mp_face_detection = mp.solutions.face_detection +face_detection = mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5) + + +def detect_face_candidates(frame): + """ + Returns list of all detected faces using lightweight FaceDetection. + """ + height, width, _ = frame.shape + rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + results = face_detection.process(rgb_frame) + + candidates = [] + + if not results.detections: + return [] + + for detection in results.detections: + bboxC = detection.location_data.relative_bounding_box + x = int(bboxC.xmin * width) + y = int(bboxC.ymin * height) + w = int(bboxC.width * width) + h = int(bboxC.height * height) + + candidates.append({ + 'box': [x, y, w, h], + 'score': w * h # Area as score + }) + + return candidates + + +def detect_person_yolo(frame): + """ + Fallback: Detect largest person using YOLO when face detection fails. + Returns [x, y, w, h] of the person's 'upper body' approximation. + """ + # Use the globally loaded model + results = model(frame, verbose=False, classes=[0]) # class 0 is person + + if not results: + return None + + best_box = None + max_area = 0 + + for result in results: + boxes = result.boxes + for box in boxes: + x1, y1, x2, y2 = [int(i) for i in box.xyxy[0]] + w = x2 - x1 + h = y2 - y1 + area = w * h + + if area > max_area: + max_area = area + # Focus on the top 40% of the person (head/chest) for framing + # This approximates where the face is if we can't detect it directly + face_h = int(h * 0.4) + best_box = [x1, y1, w, face_h] + + return best_box diff --git a/openshorts/ml/transcription.py b/openshorts/ml/transcription.py new file mode 100644 index 00000000..d3c722c6 --- /dev/null +++ b/openshorts/ml/transcription.py @@ -0,0 +1,46 @@ +"""faster-whisper transcription: CPU-optimized (INT8 quantization) with word timestamps.""" + + +def transcribe_video(video_path): + print("🎙️ Transcribing video with Faster-Whisper (CPU Optimized)...") + from faster_whisper import WhisperModel + + # Run on CPU with INT8 quantization for speed + model = WhisperModel("base", device="cpu", compute_type="int8") + + segments, info = model.transcribe(video_path, word_timestamps=True) + + print(f" Detected language '{info.language}' with probability {info.language_probability:.2f}") + + # Convert to openai-whisper compatible format + transcript_segments = [] + full_text = "" + + for segment in segments: + # Print progress to keep user informed (and prevent timeouts feeling) + print(f" [{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}") + + seg_dict = { + 'text': segment.text, + 'start': segment.start, + 'end': segment.end, + 'words': [] + } + + if segment.words: + for word in segment.words: + seg_dict['words'].append({ + 'word': word.word, + 'start': word.start, + 'end': word.end, + 'probability': word.probability + }) + + transcript_segments.append(seg_dict) + full_text += segment.text + " " + + return { + 'text': full_text.strip(), + 'segments': transcript_segments, + 'language': info.language + } diff --git a/openshorts/ml/viral_extraction.py b/openshorts/ml/viral_extraction.py new file mode 100644 index 00000000..23c4e7d6 --- /dev/null +++ b/openshorts/ml/viral_extraction.py @@ -0,0 +1,137 @@ +"""Gemini 2.5 Flash viral-moment extraction: picks 3-15 short clips from a transcript.""" + +import os +import json +from google import genai + + +GEMINI_PROMPT_TEMPLATE = """ +You are a senior short-form video editor. Read the ENTIRE transcript and word-level timestamps to choose the 3–15 MOST VIRAL moments for TikTok/IG Reels/YouTube Shorts. Each clip must be between 15 and 60 seconds long. + +⚠️ FFMPEG TIME CONTRACT — STRICT REQUIREMENTS: +- Return timestamps in ABSOLUTE SECONDS from the start of the video (usable in: ffmpeg -ss <start> -to <end> -i <input> ...). +- Only NUMBERS with decimal point, up to 3 decimals (examples: 0, 1.250, 17.350). +- Ensure 0 ≤ start < end ≤ VIDEO_DURATION_SECONDS. +- Each clip between 15 and 60 s (inclusive). +- Prefer starting 0.2–0.4 s BEFORE the hook and ending 0.2–0.4 s AFTER the payoff. +- Use silence moments for natural cuts; never cut in the middle of a word or phrase. +- STRICTLY FORBIDDEN to use time formats other than absolute seconds. + +VIDEO_DURATION_SECONDS: {video_duration} + +TRANSCRIPT_TEXT (raw): +{transcript_text} + +WORDS_JSON (array of {{w, s, e}} where s/e are seconds): +{words_json} + +STRICT EXCLUSIONS: +- No generic intros/outros or purely sponsorship segments unless they contain the hook. +- No clips < 15 s or > 60 s. + +OUTPUT — RETURN ONLY VALID JSON (no markdown, no comments). Order clips by predicted performance (best to worst). In the descriptions, ALWAYS include a CTA like "Follow me and comment X and I'll send you the workflow" (especially if discussing an n8n workflow): +{{ + "shorts": [ + {{ + "start": <number in seconds, e.g., 12.340>, + "end": <number in seconds, e.g., 37.900>, + "video_description_for_tiktok": "<description for TikTok oriented to get views>", + "video_description_for_instagram": "<description for Instagram oriented to get views>", + "video_title_for_youtube_short": "<title for YouTube Short oriented to get views 100 chars max>", + "viral_hook_text": "<SHORT punchy text overlay (max 10 words). MUST BE IN THE SAME LANGUAGE AS THE VIDEO TRANSCRIPT. Examples: 'POV: You realized...', 'Did you know?', 'Stop doing this!'>" + }} + ] +}} +""" + + +def get_viral_clips(transcript_result, video_duration): + print("🤖 Analyzing with Gemini...") + + api_key = os.getenv("GEMINI_API_KEY") + if not api_key: + print("❌ Error: GEMINI_API_KEY not found in environment variables.") + return None + + client = genai.Client(api_key=api_key) + + # We use gemini-2.5-flash as requested. + model_name = 'gemini-2.5-flash' + + print(f"🤖 Initializing Gemini with model: {model_name}") + + # Extract words + words = [] + for segment in transcript_result['segments']: + for word in segment.get('words', []): + words.append({ + 'w': word['word'], + 's': word['start'], + 'e': word['end'] + }) + + prompt = GEMINI_PROMPT_TEMPLATE.format( + video_duration=video_duration, + transcript_text=json.dumps(transcript_result['text']), + words_json=json.dumps(words) + ) + + try: + response = client.models.generate_content( + model=model_name, + contents=prompt + ) + + # --- Cost Calculation --- + try: + usage = response.usage_metadata + if usage: + # Gemini 2.5 Flash Pricing (Dec 2025) + # Input: $0.10 per 1M tokens + # Output: $0.40 per 1M tokens + + input_price_per_million = 0.10 + output_price_per_million = 0.40 + + prompt_tokens = usage.prompt_token_count + output_tokens = usage.candidates_token_count + + input_cost = (prompt_tokens / 1_000_000) * input_price_per_million + output_cost = (output_tokens / 1_000_000) * output_price_per_million + total_cost = input_cost + output_cost + + cost_analysis = { + "input_tokens": prompt_tokens, + "output_tokens": output_tokens, + "input_cost": input_cost, + "output_cost": output_cost, + "total_cost": total_cost, + "model": model_name + } + + print(f"💰 Token Usage ({model_name}):") + print(f" - Input Tokens: {prompt_tokens} (${input_cost:.6f})") + print(f" - Output Tokens: {output_tokens} (${output_cost:.6f})") + print(f" - Total Estimated Cost: ${total_cost:.6f}") + + except Exception as e: + print(f"⚠️ Could not calculate cost: {e}") + cost_analysis = None + # ------------------------ + + # Clean response if it contains markdown code blocks + text = response.text + if text.startswith("```json"): + text = text[7:] + if text.endswith("```"): + text = text[:-3] + text = text.strip() + + result_json = json.loads(text) + if cost_analysis: + result_json['cost_analysis'] = cost_analysis + + return result_json + except Exception as e: + print(f"❌ Gemini Error: {e}") + return None diff --git a/openshorts/video/pipeline.py b/openshorts/video/pipeline.py new file mode 100644 index 00000000..aa861a21 --- /dev/null +++ b/openshorts/video/pipeline.py @@ -0,0 +1,195 @@ +"""process_video_to_vertical orchestrator: scenes -> strategy -> per-frame crop -> mux. + +The hot loop. Calls scene_analysis, ml.detection, tracking, and reframing per frame, +then writes raw BGR24 frames to an FFmpeg encoder via stdin and finally muxes audio. +""" + +import os +import sys +import time +import subprocess + +import cv2 +from tqdm import tqdm + +from openshorts.video.tracking import SmoothedCameraman, SpeakerTracker, ASPECT_RATIO +from openshorts.video.scene_analysis import ( + detect_scenes, + get_video_resolution, + analyze_scenes_strategy, +) +from openshorts.video.reframing import create_general_frame +from openshorts.ml.detection import detect_face_candidates, detect_person_yolo + + +def process_video_to_vertical(input_video, final_output_video): + """ + Core logic to convert horizontal video to vertical using scene detection and Active Speaker Tracking (MediaPipe). + """ + script_start_time = time.time() + + # Define temporary file paths based on the output name + base_name = os.path.splitext(final_output_video)[0] + temp_video_output = f"{base_name}_temp_video.mp4" + temp_audio_output = f"{base_name}_temp_audio.aac" + + # Clean up previous temp files if they exist + if os.path.exists(temp_video_output): os.remove(temp_video_output) + if os.path.exists(temp_audio_output): os.remove(temp_audio_output) + if os.path.exists(final_output_video): os.remove(final_output_video) + + print(f"🎬 Processing clip: {input_video}") + print(" Step 1: Detecting scenes...") + scenes, fps = detect_scenes(input_video) + + if not scenes: + print(" ❌ No scenes were detected. Using full video as one scene.") + # If scene detection fails or finds nothing, treat whole video as one scene + cap = cv2.VideoCapture(input_video) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + cap.release() + from scenedetect import FrameTimecode + scenes = [(FrameTimecode(0, fps), FrameTimecode(total_frames, fps))] + + print(f" ✅ Found {len(scenes)} scenes.") + + print("\n 🧠 Step 2: Preparing Active Tracking...") + original_width, original_height = get_video_resolution(input_video) + + OUTPUT_HEIGHT = original_height + OUTPUT_WIDTH = int(OUTPUT_HEIGHT * ASPECT_RATIO) + if OUTPUT_WIDTH % 2 != 0: + OUTPUT_WIDTH += 1 + + # Initialize Cameraman + cameraman = SmoothedCameraman(OUTPUT_WIDTH, OUTPUT_HEIGHT, original_width, original_height) + + # --- New Strategy: Per-Scene Analysis --- + print("\n 🤖 Step 3: Analyzing Scenes for Strategy (Single vs Group)...") + scene_strategies = analyze_scenes_strategy(input_video, scenes) + # scene_strategies is a list of 'TRACK' or 'General' corresponding to scenes + + print("\n ✂️ Step 4: Processing video frames...") + + command = [ + 'ffmpeg', '-y', '-f', 'rawvideo', '-vcodec', 'rawvideo', + '-s', f'{OUTPUT_WIDTH}x{OUTPUT_HEIGHT}', '-pix_fmt', 'bgr24', + '-r', str(fps), '-i', '-', '-c:v', 'libx264', + '-preset', 'fast', '-crf', '23', '-an', temp_video_output + ] + + ffmpeg_process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) + + cap = cv2.VideoCapture(input_video) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + frame_number = 0 + current_scene_index = 0 + + # Pre-calculate scene boundaries + scene_boundaries = [] + for s_start, s_end in scenes: + scene_boundaries.append((s_start.get_frames(), s_end.get_frames())) + + # Global tracker for single-person shots + speaker_tracker = SpeakerTracker(cooldown_frames=30) + + with tqdm(total=total_frames, desc=" Processing", file=sys.stdout) as pbar: + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + + # Update Scene Index + if current_scene_index < len(scene_boundaries): + start_f, end_f = scene_boundaries[current_scene_index] + if frame_number >= end_f and current_scene_index < len(scene_boundaries) - 1: + current_scene_index += 1 + + # Determine Strategy for current frame based on scene + current_strategy = scene_strategies[current_scene_index] if current_scene_index < len(scene_strategies) else 'TRACK' + + # Apply Strategy + if current_strategy == 'GENERAL': + # "Plano General" -> Blur Background + Fit Width + output_frame = create_general_frame(frame, OUTPUT_WIDTH, OUTPUT_HEIGHT) + + # Reset cameraman/tracker so they don't drift while inactive + cameraman.current_center_x = original_width / 2 + cameraman.target_center_x = original_width / 2 + + else: + # "Single Speaker" -> Track & Crop + + # Detect every 2nd frame for performance + if frame_number % 2 == 0: + candidates = detect_face_candidates(frame) + target_box = speaker_tracker.get_target(candidates, frame_number, original_width) + if target_box: + cameraman.update_target(target_box) + else: + person_box = detect_person_yolo(frame) + if person_box: + cameraman.update_target(person_box) + + # Snap camera on scene change to avoid panning from previous scene position + is_scene_start = (frame_number == scene_boundaries[current_scene_index][0]) + + x1, y1, x2, y2 = cameraman.get_crop_box(force_snap=is_scene_start) + + # Crop + if y2 > y1 and x2 > x1: + cropped = frame[y1:y2, x1:x2] + output_frame = cv2.resize(cropped, (OUTPUT_WIDTH, OUTPUT_HEIGHT)) + else: + output_frame = cv2.resize(frame, (OUTPUT_WIDTH, OUTPUT_HEIGHT)) + + ffmpeg_process.stdin.write(output_frame.tobytes()) + frame_number += 1 + pbar.update(1) + + ffmpeg_process.stdin.close() + stderr_output = ffmpeg_process.stderr.read().decode() + ffmpeg_process.wait() + cap.release() + + if ffmpeg_process.returncode != 0: + print("\n ❌ FFmpeg frame processing failed.") + print(" Stderr:", stderr_output) + return False + + print("\n 🔊 Step 5: Extracting audio...") + audio_extract_command = [ + 'ffmpeg', '-y', '-i', input_video, '-vn', '-acodec', 'copy', temp_audio_output + ] + try: + subprocess.run(audio_extract_command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) + except subprocess.CalledProcessError: + print("\n ❌ Audio extraction failed (maybe no audio?). Proceeding without audio.") + pass + + print("\n ✨ Step 6: Merging...") + if os.path.exists(temp_audio_output): + merge_command = [ + 'ffmpeg', '-y', '-i', temp_video_output, '-i', temp_audio_output, + '-c:v', 'copy', '-c:a', 'copy', final_output_video + ] + else: + merge_command = [ + 'ffmpeg', '-y', '-i', temp_video_output, + '-c:v', 'copy', final_output_video + ] + + try: + subprocess.run(merge_command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) + print(f" ✅ Clip saved to {final_output_video}") + except subprocess.CalledProcessError as e: + print("\n ❌ Final merge failed.") + print(" Stderr:", e.stderr.decode()) + return False + + # Clean up temp files + if os.path.exists(temp_video_output): os.remove(temp_video_output) + if os.path.exists(temp_audio_output): os.remove(temp_audio_output) + + return True diff --git a/openshorts/video/reframing.py b/openshorts/video/reframing.py new file mode 100644 index 00000000..3b56a1d6 --- /dev/null +++ b/openshorts/video/reframing.py @@ -0,0 +1,42 @@ +"""Vertical reframing helpers: blurred-background 'General Shot' composite.""" + +import cv2 + + +def create_general_frame(frame, output_width, output_height): + """ + Creates a 'General Shot' frame: + - Background: Blurred zoom of original + - Foreground: Original video scaled to fit width, centered vertically. + """ + orig_h, orig_w = frame.shape[:2] + + # 1. Background (Fill Height) + # Crop center to aspect ratio + bg_scale = output_height / orig_h + bg_w = int(orig_w * bg_scale) + bg_resized = cv2.resize(frame, (bg_w, output_height)) + + # Crop center of background + start_x = (bg_w - output_width) // 2 + if start_x < 0: start_x = 0 + background = bg_resized[:, start_x:start_x+output_width] + if background.shape[1] != output_width: + background = cv2.resize(background, (output_width, output_height)) + + # Blur background + background = cv2.GaussianBlur(background, (51, 51), 0) + + # 2. Foreground (Fit Width) + scale = output_width / orig_w + fg_h = int(orig_h * scale) + foreground = cv2.resize(frame, (output_width, fg_h)) + + # 3. Overlay + y_offset = (output_height - fg_h) // 2 + + # Clone background to avoid modifying it + final_frame = background.copy() + final_frame[y_offset:y_offset+fg_h, :] = foreground + + return final_frame diff --git a/openshorts/video/scene_analysis.py b/openshorts/video/scene_analysis.py new file mode 100644 index 00000000..7c49d2ab --- /dev/null +++ b/openshorts/video/scene_analysis.py @@ -0,0 +1,77 @@ +"""PySceneDetect scene boundaries + per-scene TRACK/GENERAL strategy analysis.""" + +import cv2 +from scenedetect import open_video, SceneManager +from scenedetect.detectors import ContentDetector +from tqdm import tqdm + +from openshorts.ml.detection import detect_face_candidates + + +def detect_scenes(video_path): + video = open_video(video_path) + scene_manager = SceneManager() + scene_manager.add_detector(ContentDetector()) + scene_manager.detect_scenes(video=video) + scene_list = scene_manager.get_scene_list() + fps = video.frame_rate + return scene_list, fps + + +def get_video_resolution(video_path): + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + raise IOError(f"Could not open video file {video_path}") + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap.release() + return width, height + + +def analyze_scenes_strategy(video_path, scenes): + """ + Analyzes each scene to determine if it should be TRACK (Single person) or GENERAL (Group/Wide). + Returns list of strategies corresponding to scenes. + """ + cap = cv2.VideoCapture(video_path) + strategies = [] + + if not cap.isOpened(): + return ['TRACK'] * len(scenes) + + for start, end in tqdm(scenes, desc=" Analyzing Scenes"): + # Sample 3 frames (start, middle, end) + frames_to_check = [ + start.get_frames() + 5, + int((start.get_frames() + end.get_frames()) / 2), + end.get_frames() - 5 + ] + + face_counts = [] + for f_idx in frames_to_check: + cap.set(cv2.CAP_PROP_POS_FRAMES, f_idx) + ret, frame = cap.read() + if not ret: continue + + # Detect faces + candidates = detect_face_candidates(frame) + face_counts.append(len(candidates)) + + # Decision Logic + if not face_counts: + avg_faces = 0 + else: + avg_faces = sum(face_counts) / len(face_counts) + + # Strategy: + # 0 faces -> GENERAL (Landscape/B-roll) + # 1 face -> TRACK + # > 1.2 faces -> GENERAL (Group) + + if avg_faces > 1.2 or avg_faces < 0.5: + strategies.append('GENERAL') + else: + strategies.append('TRACK') + + cap.release() + return strategies diff --git a/openshorts/video/tracking.py b/openshorts/video/tracking.py new file mode 100644 index 00000000..7b267d6a --- /dev/null +++ b/openshorts/video/tracking.py @@ -0,0 +1,212 @@ +"""SmoothedCameraman and SpeakerTracker: the heart of stabilized vertical reframing. + +SmoothedCameraman implements the "Heavy Tripod" pan logic — only moves when the +subject leaves the safe zone, slow (3 px/frame) when nearby, fast (15 px/frame) +when the distance is huge. SpeakerTracker prevents rapid speaker switching via +hysteresis (3x score bonus for the active speaker) and a frame-based cooldown. +""" + +ASPECT_RATIO = 9 / 16 + + +class SmoothedCameraman: + """ + Handles smooth camera movement. + Simplified Logic: "Heavy Tripod" + Only moves if the subject leaves the center safe zone. + Moves slowly and linearly. + """ + def __init__(self, output_width, output_height, video_width, video_height): + self.output_width = output_width + self.output_height = output_height + self.video_width = video_width + self.video_height = video_height + + # Initial State + self.current_center_x = video_width / 2 + self.target_center_x = video_width / 2 + + # Calculate crop dimensions once + self.crop_height = video_height + self.crop_width = int(self.crop_height * ASPECT_RATIO) + if self.crop_width > video_width: + self.crop_width = video_width + self.crop_height = int(self.crop_width / ASPECT_RATIO) + + # Safe Zone: 20% of the video width + # As long as the target is within this zone relative to current center, DO NOT MOVE. + self.safe_zone_radius = self.crop_width * 0.25 + + def update_target(self, face_box): + """ + Updates the target center based on detected face/person. + """ + if face_box: + x, y, w, h = face_box + self.target_center_x = x + w / 2 + + def get_crop_box(self, force_snap=False): + """ + Returns the (x1, y1, x2, y2) for the current frame. + """ + if force_snap: + self.current_center_x = self.target_center_x + else: + diff = self.target_center_x - self.current_center_x + + # SIMPLIFIED LOGIC: + # 1. Is the target outside the safe zone? + if abs(diff) > self.safe_zone_radius: + # 2. If yes, move towards it slowly (Linear Speed) + # Determine direction + direction = 1 if diff > 0 else -1 + + # Speed: 2 pixels per frame (Slow pan) + # If the distance is HUGE (scene change or fast movement), speed up slightly + if abs(diff) > self.crop_width * 0.5: + speed = 15.0 # Fast re-frame + else: + speed = 3.0 # Slow, steady pan + + self.current_center_x += direction * speed + + # Check if we overshot (prevent oscillation) + new_diff = self.target_center_x - self.current_center_x + if (direction == 1 and new_diff < 0) or (direction == -1 and new_diff > 0): + self.current_center_x = self.target_center_x + + # If inside safe zone, DO NOTHING (Stationary Camera) + + # Clamp center + half_crop = self.crop_width / 2 + + if self.current_center_x - half_crop < 0: + self.current_center_x = half_crop + if self.current_center_x + half_crop > self.video_width: + self.current_center_x = self.video_width - half_crop + + x1 = int(self.current_center_x - half_crop) + x2 = int(self.current_center_x + half_crop) + + x1 = max(0, x1) + x2 = min(self.video_width, x2) + + y1 = 0 + y2 = self.video_height + + return x1, y1, x2, y2 + + +class SpeakerTracker: + """ + Tracks speakers over time to prevent rapid switching and handle temporary obstructions. + """ + def __init__(self, stabilization_frames=15, cooldown_frames=30): + self.active_speaker_id = None + self.speaker_scores = {} # {id: score} + self.last_seen = {} # {id: frame_number} + self.locked_counter = 0 # How long we've been locked on current speaker + + # Hyperparameters + self.stabilization_threshold = stabilization_frames # Frames needed to confirm a new speaker + self.switch_cooldown = cooldown_frames # Minimum frames before switching again + self.last_switch_frame = -1000 + + # ID tracking + self.next_id = 0 + self.known_faces = [] # [{'id': 0, 'center': x, 'last_frame': 123}] + + def get_target(self, face_candidates, frame_number, width): + """ + Decides which face to focus on. + face_candidates: list of {'box': [x,y,w,h], 'score': float} + """ + current_candidates = [] + + # 1. Match faces to known IDs (simple distance tracking) + for face in face_candidates: + x, y, w, h = face['box'] + center_x = x + w / 2 + + best_match_id = -1 + min_dist = width * 0.15 # Reduced matching radius to avoid jumping in groups + + # Try to match with known faces seen recently + for kf in self.known_faces: + if frame_number - kf['last_frame'] > 30: # Forgot faces older than 1s (was 2s) + continue + + dist = abs(center_x - kf['center']) + if dist < min_dist: + min_dist = dist + best_match_id = kf['id'] + + # If no match, assign new ID + if best_match_id == -1: + best_match_id = self.next_id + self.next_id += 1 + + # Update known face + self.known_faces = [kf for kf in self.known_faces if kf['id'] != best_match_id] + self.known_faces.append({'id': best_match_id, 'center': center_x, 'last_frame': frame_number}) + + current_candidates.append({ + 'id': best_match_id, + 'box': face['box'], + 'score': face['score'] + }) + + # 2. Update Scores with decay + for pid in list(self.speaker_scores.keys()): + self.speaker_scores[pid] *= 0.85 # Faster decay (was 0.9) + if self.speaker_scores[pid] < 0.1: + del self.speaker_scores[pid] + + # Add new scores + for cand in current_candidates: + pid = cand['id'] + # Score is purely based on size (proximity) now that we don't have mouth + raw_score = cand['score'] / (width * width * 0.05) + self.speaker_scores[pid] = self.speaker_scores.get(pid, 0) + raw_score + + # 3. Determine Best Speaker + if not current_candidates: + # If no one found, maintain last active speaker if cooldown allows + # to avoid black screen or jump to 0,0 + return None + + best_candidate = None + max_score = -1 + + for cand in current_candidates: + pid = cand['id'] + total_score = self.speaker_scores.get(pid, 0) + + # Hysteresis: HUGE Bonus for current active speaker + if pid == self.active_speaker_id: + total_score *= 3.0 # Sticky factor + + if total_score > max_score: + max_score = total_score + best_candidate = cand + + # 4. Decide Switch + if best_candidate: + target_id = best_candidate['id'] + + if target_id == self.active_speaker_id: + self.locked_counter += 1 + return best_candidate['box'] + + # New person + if frame_number - self.last_switch_frame < self.switch_cooldown: + old_cand = next((c for c in current_candidates if c['id'] == self.active_speaker_id), None) + if old_cand: + return old_cand['box'] + + self.active_speaker_id = target_id + self.last_switch_frame = frame_number + self.locked_counter = 0 + return best_candidate['box'] + + return None From db5f0d9af0ac982ae453852320463ccf07cef416 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 15:57:36 -0400 Subject: [PATCH 10/43] chore(restructure): move saasshorts -> openshorts/saas/pipeline.py Phase 1 step 8: relocate the SaaS UGC pipeline as a single module. The plan calls for an internal split into research / scripting / media / compositing / pipeline; doing that as one move keeps the change small, ships the file into the right folder, and defers the function-level split to a follow-up commit. Shim at saasshorts.py preserves existing `from saasshorts import ...` calls. No direct test coverage for this module; the openapi.json contract still passes. Tests stay 62/62 green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- openshorts/saas/pipeline.py | 1474 ++++++++++++++++++++++++++++++++++ saasshorts.py | 1494 +---------------------------------- 2 files changed, 1495 insertions(+), 1473 deletions(-) create mode 100644 openshorts/saas/pipeline.py diff --git a/openshorts/saas/pipeline.py b/openshorts/saas/pipeline.py new file mode 100644 index 00000000..2a00472c --- /dev/null +++ b/openshorts/saas/pipeline.py @@ -0,0 +1,1474 @@ +""" +SaaSShorts: AI-powered UGC video generator for SaaS products. + +Generates viral TikTok/Instagram Reels content from a SaaS URL. +Pipeline: + 1. Scrape & analyze SaaS website (Gemini) + 2. Generate video scripts (hook → problem → solution → CTA) + 3. Generate AI actor portrait (Flux Pro via fal.ai) + 4. Generate voiceover (ElevenLabs TTS) + 5. Generate talking head video (Kling Avatar v2 via fal.ai) + 6. Generate b-roll clips (Kling v2.6 via fal.ai) + 7. Composite final video with subtitles (FFmpeg) +""" + +import os +import re +import json +import time +import subprocess +import httpx +from urllib.parse import urljoin +from typing import Optional, List, Dict, Callable +from concurrent.futures import ThreadPoolExecutor, as_completed + + +ELEVENLABS_API_BASE = "https://api.elevenlabs.io/v1" +FAL_QUEUE_BASE = "https://queue.fal.run" + +# Default ElevenLabs voices (name → voice_id) +DEFAULT_VOICES = { + "Rachel (Female, calm)": "21m00Tcm4TlvDq8ikWAM", + "Drew (Male, confident)": "29vD33N1CtxCmqQRPOHJ", + "Bella (Female, soft)": "EXAVITQu4vr4xnSDxMaL", + "Antoni (Male, warm)": "ErXwobaYiN019PkySvjV", + "Josh (Male, deep)": "TxGEqnHWrfWFTfGW9XjX", + "Sam (Male, raspy)": "yoZ06aMxZJJ28mfd3POQ", +} + + +GEMINI_MODEL = "gemini-3-flash-preview" + + +# ═══════════════════════════════════════════════════════════════════════ +# Phase 1: Website Scraping, Web Research & Analysis +# ═══════════════════════════════════════════════════════════════════════ + +def research_saas_online(url: str, gemini_key: str) -> dict: + """ + Use Gemini with Google Search grounding to deeply research a SaaS product + across the internet: reviews, Reddit threads, Twitter, competitor comparisons, + pricing complaints, user testimonials, etc. + """ + from google import genai + from google.genai import types + + print(f"[SaaSShorts] 🔍 Researching {url} across the web (Google Search grounding)...") + + client = genai.Client(api_key=gemini_key) + + # Extract domain name for search queries + domain = url.replace("https://", "").replace("http://", "").split("/")[0] + + prompt = f"""You are a world-class SaaS market researcher. Research this product thoroughly using Google Search. + +Product URL: {url} +Domain: {domain} + +SEARCH AND INVESTIGATE: +1. What does this SaaS product do? (search their website, Product Hunt, G2, Capterra) +2. What are REAL user reviews saying? (G2, Capterra, TrustPilot, Reddit, Twitter/X) +3. What are the most common complaints and pain points users mention? +4. Who are their main competitors and how do they compare? +5. What is their pricing and do users think it's worth it? +6. What is their target market and ideal customer profile? +7. Are there any viral posts, memes, or discussions about this product? +8. What content creators or influencers have talked about them? + +Return a comprehensive JSON research report: +{{ + "product_name": "...", + "website_url": "{url}", + "what_it_does": "Detailed description of the product based on web research", + "target_market": "Who this product is for", + "pricing_info": "Pricing details found online (plans, costs, free tier)", + "user_sentiment": "overall positive/mixed/negative", + "real_reviews": [ + {{"source": "G2/Reddit/Twitter/etc", "quote": "actual user quote or paraphrase", "sentiment": "positive/negative/neutral"}}, + ... + ], + "common_complaints": ["complaint 1 from real users", "complaint 2", ...], + "common_praise": ["what users love 1", "what users love 2", ...], + "competitors": [ + {{"name": "competitor", "comparison": "how they compare"}} + ], + "viral_potential": ["angle 1 based on real discussions", "angle 2", ...], + "key_differentiators": ["what makes them unique based on research"], + "content_angles_from_web": ["angles found from existing content about this product"], + "sources_found": ["list of URLs where information was found"] +}} + +Be thorough. Use REAL data from your search results, not made-up information.""" + + response = client.models.generate_content( + model=GEMINI_MODEL, + contents=[prompt], + config=types.GenerateContentConfig( + tools=[types.Tool(google_search=types.GoogleSearch())], + ), + ) + + # Extract grounding sources + sources = [] + try: + metadata = response.candidates[0].grounding_metadata + if metadata and metadata.grounding_chunks: + for chunk in metadata.grounding_chunks: + if chunk.web: + sources.append({"title": chunk.web.title, "url": chunk.web.uri}) + if metadata and metadata.web_search_queries: + print(f"[SaaSShorts] Searches performed: {metadata.web_search_queries}") + except Exception: + pass + + # Parse response text as JSON + raw = response.text + if not raw: + print("[SaaSShorts] ⚠️ Gemini returned empty response for web research") + return {"raw_research": "", "product_name": domain, "grounding_sources": sources} + + text = raw.strip() + if text.startswith("```"): + text = re.sub(r"^```(?:json)?\n?", "", text) + text = re.sub(r"\n?```$", "", text) + + start = text.find("{") + end = text.rfind("}") + if start != -1 and end != -1: + text = text[start : end + 1] + + try: + research = json.loads(text) + except json.JSONDecodeError: + research = {"raw_research": text, "product_name": domain} + + research["grounding_sources"] = sources + print(f"[SaaSShorts] ✅ Web research complete: {len(sources)} sources found") + return research + + +def scrape_website(url: str) -> dict: + """Scrape a SaaS website to extract key content for analysis.""" + from bs4 import BeautifulSoup + + print(f"[SaaSShorts] 🌐 Scraping {url}...") + + headers = { + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + } + + with httpx.Client(timeout=30.0, follow_redirects=True) as client: + response = client.get(url, headers=headers) + response.raise_for_status() + + soup = BeautifulSoup(response.text, "html.parser") + + # Remove non-content elements + for tag in soup(["script", "style", "nav", "footer", "header", "noscript", "svg", "iframe"]): + tag.decompose() + + # Extract metadata + meta_desc = "" + meta_tag = soup.find("meta", attrs={"name": "description"}) + if meta_tag: + meta_desc = meta_tag.get("content", "") + + og_desc = "" + og_tag = soup.find("meta", attrs={"property": "og:description"}) + if og_tag: + og_desc = og_tag.get("content", "") + + title = soup.title.string.strip() if soup.title and soup.title.string else "" + + # Extract headings + headings = [] + for h in soup.find_all(["h1", "h2", "h3"]): + text = h.get_text(strip=True) + if text and len(text) < 200: + headings.append(text) + + # Main text content + text = soup.get_text(separator="\n", strip=True) + text = re.sub(r"\n{3,}", "\n\n", text) + text = text[:10000] + + # Find subpages to scrape + base_host = httpx.URL(url).host + subpages = set() + for a in soup.find_all("a", href=True): + href = a["href"].lower() + if any(kw in href for kw in ["pricing", "features", "about", "product", "why", "how-it-works", "use-case"]): + try: + full_url = urljoin(url, a["href"]) + full_host = httpx.URL(full_url).host + if base_host and full_host and base_host == full_host: + subpages.add(full_url) + except Exception: + pass + + # Scrape subpages (max 3) + additional = "" + for sub_url in list(subpages)[:3]: + try: + print(f"[SaaSShorts] → Subpage: {sub_url}") + with httpx.Client(timeout=20.0, follow_redirects=True) as client: + resp = client.get(sub_url, headers=headers) + if resp.status_code == 200: + sub_soup = BeautifulSoup(resp.text, "html.parser") + for tag in sub_soup(["script", "style", "nav", "footer", "header", "noscript"]): + tag.decompose() + sub_text = sub_soup.get_text(separator="\n", strip=True)[:5000] + additional += f"\n\n--- {sub_url} ---\n{sub_text}" + except Exception as e: + print(f"[SaaSShorts] ⚠️ Failed: {e}") + + result = { + "url": url, + "title": title, + "meta_description": meta_desc or og_desc, + "headings": headings[:20], + "main_content": text, + "additional_pages": additional[:15000], + "pages_scraped": 1 + min(len(subpages), 3), + } + + print(f"[SaaSShorts] ✅ Scraped {result['pages_scraped']} pages, {len(text)} chars") + return result + + +def analyze_saas(scraped_data: dict, gemini_key: str, web_research: dict = None) -> dict: + """ + Deep analysis of a SaaS product combining website scraping + web research. + Uses Gemini 3 Flash for synthesis. + """ + from google import genai + from google.genai import types + + print(f"[SaaSShorts] 🧠 Analyzing {scraped_data['url']} (with web research)...") + + client = genai.Client(api_key=gemini_key) + + # Build web research context + research_context = "" + if web_research: + research_context = f""" +=== WEB RESEARCH (from Google Search) === +Product: {web_research.get('product_name', 'Unknown')} +What it does: {web_research.get('what_it_does', 'N/A')} +Target market: {web_research.get('target_market', 'N/A')} +Pricing: {web_research.get('pricing_info', 'N/A')} +User sentiment: {web_research.get('user_sentiment', 'N/A')} + +Real user reviews: +{json.dumps(web_research.get('real_reviews', [])[:8], indent=2)} + +Common complaints from real users: +{json.dumps(web_research.get('common_complaints', []), indent=2)} + +What users love: +{json.dumps(web_research.get('common_praise', []), indent=2)} + +Competitors: +{json.dumps(web_research.get('competitors', []), indent=2)} + +Viral angles from existing content: +{json.dumps(web_research.get('viral_potential', []), indent=2)} + +Key differentiators: +{json.dumps(web_research.get('key_differentiators', []), indent=2)} + +Content angles found online: +{json.dumps(web_research.get('content_angles_from_web', []), indent=2)} +""" + + prompt = f"""You are an expert SaaS marketing analyst and UGC content strategist. Analyze this SaaS product for creating viral UGC-style marketing videos. + +You have TWO sources of information: +1. The product's OWN WEBSITE (scraped content) +2. EXTERNAL WEB RESEARCH (real reviews, Reddit, competitor analysis, user sentiment from Google Search) + +Combine BOTH to create the most accurate and compelling analysis possible. Prioritize REAL user pain points and sentiments from the web research. + +Website: {scraped_data['url']} +Title: {scraped_data['title']} +Meta: {scraped_data['meta_description']} +Headings: {json.dumps(scraped_data['headings'][:15])} + +=== WEBSITE CONTENT === +{scraped_data['main_content'][:6000]} + +=== ADDITIONAL PAGES === +{scraped_data['additional_pages'][:8000]} +{research_context} + +Return a JSON object: +{{ + "product_name": "Name of the SaaS", + "one_liner": "One-sentence description", + "target_audience": ["audience 1", "audience 2", "audience 3"], + "pain_points": [ + {{"pain": "specific pain point (from real user feedback if available)", "intensity": "high/medium/low", "emotional_trigger": "frustration/fear/time-waste/money-loss/overwhelm", "source": "website/user-reviews/reddit/general"}} + ], + "key_features": ["feature 1", "feature 2", "feature 3"], + "unique_selling_points": ["usp 1", "usp 2"], + "competitors": [ + {{"name": "competitor", "comparison": "how they compare"}} + ], + "pricing_model": "freemium/subscription/one-time/usage-based", + "pricing_details": "specific pricing info if found", + "industry": "category", + "user_sentiment_summary": "what real users think overall", + "emotional_hooks": [ + "Stop wasting X hours on...", + "Your competitors are already using...", + "I wish I knew about this sooner..." + ], + "transformation_story": "Before (with real pain) → After (with product) narrative", + "viral_angles": [ + {{"angle": "description", "platform": "tiktok/instagram/both", "style": "ugc/educational/shock/story", "why_viral": "reason this angle works"}} + ] +}} + +IMPORTANT: Use REAL pain points from user reviews when available. Real frustrations make the best UGC content. +Include 5-8 pain points, 4-6 emotional hooks, and 4+ viral angles.""" + + response = client.models.generate_content( + model=GEMINI_MODEL, + contents=[prompt], + config=types.GenerateContentConfig(response_mime_type="application/json"), + ) + + raw = response.text + if not raw: + raise Exception("Gemini returned empty response for SaaS analysis") + + text = raw.strip() + if text.startswith("```"): + text = re.sub(r"^```(?:json)?\n?", "", text) + text = re.sub(r"\n?```$", "", text) + + start = text.find("{") + end = text.rfind("}") + if start != -1 and end != -1: + text = text[start : end + 1] + + try: + analysis = json.loads(text) + except json.JSONDecodeError as e: + raise Exception(f"Failed to parse analysis JSON: {e}\nRaw: {text[:500]}") + + # Attach web research sources for reference + if web_research and web_research.get("grounding_sources"): + analysis["_web_sources"] = web_research["grounding_sources"] + + print(f"[SaaSShorts] ✅ Analysis: {analysis.get('product_name', '?')} ({len(analysis.get('pain_points', []))} pain points)") + return analysis + + +def generate_scripts( + analysis: dict, + gemini_key: str, + num_scripts: int = 3, + style: str = "ugc", + language: str = "en", + actor_gender: str = "female", +) -> list: + """Generate video scripts based on SaaS analysis.""" + from google import genai + from google.genai import types + + lang_name = "Spanish" if language == "es" else "English" + print(f"[SaaSShorts] 📝 Generating {num_scripts} scripts ({style}, {lang_name})...") + + client = genai.Client(api_key=gemini_key) + + style_guide = { + "ugc": "Natural, authentic UGC style. Person talking to camera like sharing a discovery with a friend. Casual, genuine.", + "educational": "Educational style. Clear explanations.", + "shock": "Shock/discovery style. Surprising opener.", + "story": "Storytelling style. Mini narrative.", + "comparison": "Before/after comparison.", + } + + lang_instructions = "" + if language == "es": + lang_instructions = """ +LANGUAGE: ALL narrations, subtitles, captions, and hashtags MUST be in SPANISH (Spain/Latin America). +Use natural casual Spanish like a real person would speak on TikTok. Contractions, slang OK. +Examples of Spanish UGC hooks: "Tío, no me puedo creer que nadie me haya dicho esto antes...", "Si usas Excel para esto, necesitas ver esto YA", "Os voy a enseñar algo que me ha cambiado la vida..." +""" + else: + lang_instructions = """ +LANGUAGE: ALL narrations, subtitles, captions, and hashtags MUST be in ENGLISH. +Use natural casual American English like a real person on TikTok. Contractions, slang OK. +Examples of English UGC hooks: "Okay so I just found this tool and...", "Stop doing this manually, there's a better way", "I can't believe nobody told me about this sooner..." +""" + + prompt = f"""You are a viral short-form video scriptwriter for TikTok/Instagram Reels. +Generate {num_scripts} video scripts to promote this product/business. +{lang_instructions} +PRODUCT ANALYSIS: +{json.dumps(analysis, indent=2)} + +STYLE: {style_guide.get(style, style_guide['ugc'])} + +Each script MUST be 20-25 seconds total. NEVER longer than 25 seconds. + +YOU MUST USE EXACTLY THIS 5-SEGMENT STRUCTURE. NO EXCEPTIONS: +1. HOOK (0-5s): type="hook", visual="actor_talking", broll_prompt=null — Avatar says a punchy hook. +2. B-ROLL 1 (5-9s): type="problem", visual="broll", broll_prompt="..." (REQUIRED) — Visual of the problem. +3. BODY (9-16s): type="solution", visual="actor_talking", broll_prompt=null — Avatar presents the solution. +4. B-ROLL 2 (16-21s): type="demo", visual="broll", broll_prompt="..." (REQUIRED) — Visual of the product. +5. CTA (21-25s): type="cta", visual="actor_talking", broll_prompt=null — Avatar says CTA with link in bio. + +CRITICAL — READ CAREFULLY: +- EXACTLY 5 segments. Not 3, not 4, not 6. FIVE. +- Segments 2 and 4 MUST have visual="broll" and a non-null broll_prompt string. +- Segments 1, 3, 5 MUST have visual="actor_talking" and broll_prompt=null. +- duration_seconds MUST be between 20 and 25. +- full_narration = all narration text joined together. + +Return a JSON array: +[ + {{ + "title": "Short internal title", + "style": "{style}", + "duration_seconds": 23, + "target_platform": "tiktok", + "hook_text": "Hook overlay text (2-5 words max)", + "segments": [ + {{ + "type": "hook", + "start": 0, + "end": 5, + "narration": "Punchy hook the actor says", + "visual": "actor_talking", + "broll_prompt": null, + "emotion": "excited", + "subtitle_text": "Hook phrase" + }}, + {{ + "type": "problem", + "start": 5, + "end": 9, + "narration": "Voiceover describing the pain point", + "visual": "broll", + "broll_prompt": "REQUIRED: visual of the problem, e.g. person frustrated at laptop, cluttered spreadsheet on screen", + "emotion": "frustrated", + "subtitle_text": "Pain phrase" + }}, + {{ + "type": "solution", + "start": 9, + "end": 16, + "narration": "Actor introduces the product naturally", + "visual": "actor_talking", + "broll_prompt": null, + "emotion": "confident", + "subtitle_text": "Solution phrase" + }}, + {{ + "type": "demo", + "start": 16, + "end": 21, + "narration": "Voiceover showing the product in action", + "visual": "broll", + "broll_prompt": "REQUIRED: visual of the product/result, e.g. clean dashboard with metrics, modern app interface", + "emotion": "excited", + "subtitle_text": "Result phrase" + }}, + {{ + "type": "cta", + "start": 21, + "end": 23, + "narration": "Short CTA mentioning link in bio", + "visual": "actor_talking", + "broll_prompt": null, + "emotion": "confident", + "subtitle_text": "Link in bio" + }} + ], + "full_narration": "All narration text joined (only actor_talking segments)", + "actor_description": "Specific person description: age, gender, ethnicity, hair style, clothing. Casual everyday look.", + "hashtags": ["#saas", "#productivity", "#techtools"], + "caption": "Suggested Instagram/TikTok caption" + }} +] + +RULES: +- EXACTLY 5 segments in order: actor, broll, actor, broll, actor +- EXACTLY 2 broll segments with detailed broll_prompt (NOT null) +- full_narration = ALL narration text (both actor and broll voiceover segments joined) +- Total duration MUST be 18-22 seconds, never more +- Keep narrations punchy, conversational, with contractions +- Actor descriptions: casual, real-person look (NOT model/influencer) +- B-roll prompts: cinematic, specific, detailed visual descriptions +- Each script should use a different pain point / angle +- Vary actor demographics across scripts +- CTA MUST always mention "link in bio" / "enlace en la bio". Examples: "Link in bio, go try it", "Check the link in my bio", "El enlace está en la bio, probadlo" +- Write ALL text in {lang_name} +- Actor gender: {actor_gender}. ALL actor_description fields MUST describe a {actor_gender} person. Use diverse ages/ethnicities across scripts. +- IMPORTANT: actor_description MUST ALWAYS be in ENGLISH regardless of script language. Only describe physical appearance: age, gender, ethnicity, hair, clothing. NO actions, NO background, NO scene description. +- Actors must look European, attractive but natural, slightly nerdy/tech vibe. Vary across: blonde, brunette, redhead. Ages 22-35. +- If female: casual summer look (tank top, camisole, simple tee). If male: casual tee or hoodie. +- Example female: "a 26 year old attractive european woman, light brown wavy hair, wearing a white tank top, natural minimal makeup, friendly face" +- Example male: "a 29 year old european man, short dark hair, light stubble, wearing a navy t-shirt, smart casual look" """ + + response = client.models.generate_content( + model=GEMINI_MODEL, + contents=[prompt], + config=types.GenerateContentConfig( + response_mime_type="application/json", + max_output_tokens=8192, + ), + ) + + raw = response.text + if not raw: + raise Exception("Gemini returned empty response for script generation") + + text = raw.strip() + if text.startswith("```"): + text = re.sub(r"^```(?:json)?\n?", "", text) + text = re.sub(r"\n?```$", "", text) + + start = text.find("[") + end = text.rfind("]") + if start != -1 and end != -1: + text = text[start : end + 1] + + try: + scripts = json.loads(text) + except json.JSONDecodeError as e: + raise Exception(f"Failed to parse scripts JSON: {e}\nRaw: {text[:500]}") + + print(f"[SaaSShorts] ✅ Generated {len(scripts)} scripts") + return scripts + + +# ═══════════════════════════════════════════════════════════════════════ +# Phase 2: Asset Generation +# ═══════════════════════════════════════════════════════════════════════ + +def _fal_run(model_id: str, input_data: dict, fal_key: str, timeout: int = 600) -> dict: + """ + Submit a job to fal.ai queue, poll for completion, return result. + Uses the URLs returned by the submit response (as per fal.ai docs). + """ + headers = { + "Authorization": f"Key {fal_key}", + "Content-Type": "application/json", + } + + # ── Step 1: Submit to queue ── + submit_url = f"{FAL_QUEUE_BASE}/{model_id}" + print(f"[fal.ai] Submitting to {submit_url}...") + + with httpx.Client(timeout=120.0) as client: + resp = client.post(submit_url, headers=headers, json=input_data) + + if resp.status_code >= 400: + print(f"[fal.ai] Submit error: {resp.text[:500]}") + raise Exception(f"fal.ai error ({resp.status_code}): {resp.text[:300]}") + + try: + submit_data = resp.json() + except json.JSONDecodeError: + raise Exception(f"fal.ai invalid JSON: {resp.text[:300]}") + + request_id = submit_data.get("request_id") + if not request_id: + # Synchronous result (no queue) + return submit_data + + # Use the URLs from the submit response (guaranteed correct per docs) + status_url = submit_data.get("status_url", f"{FAL_QUEUE_BASE}/{model_id}/requests/{request_id}/status") + response_url = submit_data.get("response_url", f"{FAL_QUEUE_BASE}/{model_id}/requests/{request_id}") + + print(f"[fal.ai] Queued: {request_id}") + print(f"[fal.ai] Status URL: {status_url}") + + # ── Step 2: Poll for completion ── + poll_headers = {"Authorization": f"Key {fal_key}"} + start = time.time() + + while time.time() - start < timeout: + elapsed = int(time.time() - start) + try: + with httpx.Client(timeout=30.0) as client: + poll_resp = client.get(f"{status_url}?logs=1", headers=poll_headers) + status_data = poll_resp.json() + except Exception as e: + print(f"[fal.ai] Poll error (retrying): {e}") + time.sleep(5) + continue + + status = status_data.get("status", "UNKNOWN") + + if status == "COMPLETED": + print(f"[fal.ai] ✅ Completed in {elapsed}s! Fetching result...") + with httpx.Client(timeout=120.0) as client: + result_resp = client.get(response_url, headers=poll_headers) + return result_resp.json() + + elif status in ("FAILED", "CANCELLED"): + error = status_data.get("error", "unknown error") + raise Exception(f"fal.ai job {status}: {error}") + + # Log progress + queue_pos = status_data.get("queue_position", "") + pos_info = f" (pos: {queue_pos})" if queue_pos != "" else "" + print(f"[fal.ai] {model_id}: {status}{pos_info} ({elapsed}s)") + time.sleep(5) + + raise Exception(f"fal.ai job timed out after {timeout}s for {model_id}") + + +def _fal_upload_file(file_path: str, fal_key: str) -> str: + """Upload a local file to fal.ai CDN storage and return public URL.""" + headers = {"Authorization": f"Key {fal_key}"} + + filename = os.path.basename(file_path) + ext = os.path.splitext(filename)[1].lower() + content_types = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".mp3": "audio/mpeg", + ".wav": "audio/wav", + ".mp4": "video/mp4", + ".webp": "image/webp", + } + content_type = content_types.get(ext, "application/octet-stream") + + # Initiate upload + with httpx.Client(timeout=30.0) as client: + resp = client.post( + "https://rest.alpha.fal.ai/storage/upload/initiate", + headers={**headers, "Content-Type": "application/json"}, + json={"file_name": filename, "content_type": content_type}, + ) + resp.raise_for_status() + upload_info = resp.json() + + upload_url = upload_info["upload_url"] + file_url = upload_info["file_url"] + + # Upload file content + with open(file_path, "rb") as f: + file_bytes = f.read() + + with httpx.Client(timeout=120.0) as client: + resp = client.put( + upload_url, + content=file_bytes, + headers={"Content-Type": content_type}, + ) + resp.raise_for_status() + + print(f"[fal.ai] Uploaded {filename} → {file_url}") + return file_url + + +def generate_actor_images( + description: str, fal_key: str, output_dir: str, title_slug: str, num_options: int = 3, + product_description: str = None, +) -> List[str]: + """Generate multiple hyper-realistic actor portrait options using Flux 2 Pro.""" + print(f"[SaaSShorts] 🎨 Generating {num_options} actor image options (Flux 2 Pro)...") + + # Clean description: strip scene/actions, keep only physical appearance + clean_desc = description + for remove in ["hablando", "talking", "sentad", "sitting", "desde", "from", "con una", "with a", "detrás", "behind"]: + if remove in clean_desc.lower(): + idx = clean_desc.lower().find(remove) + if idx > 10: + clean_desc = clean_desc[:idx].rstrip(" ,.") + + import random + img_num = random.randint(1000, 9999) + + if product_description: + prompt = f"""IMG_{img_num}.jpg Raw candid selfie of {clean_desc}, casually holding {product_description}, showing it to the camera with a natural smile. Product clearly visible in hand. Casual and real, not an ad. Low quality front camera, soft room lighting. Reddit selfie.""" + else: + prompt = f"""IMG_{img_num}.jpg Raw candid selfie of {clean_desc}, sitting at their desk at home, looking at camera with a relaxed natural smile. Headphones around neck, monitor glow behind them. Not posed, casual and real. Low quality front camera, soft room lighting. Reddit selfie.""" + + print(f"[SaaSShorts] Prompt: {prompt[:120]}...{' (with product)' if product_description else ''}") + + paths = [] + # Flux 2 Pro — #1 for photorealistic faces + def _gen_one(i): + result = _fal_run( + "fal-ai/flux-2-pro", + { + "prompt": prompt, + "image_size": "portrait_4_3", + "safety_tolerance": 5, + "seed": random.randint(0, 999999), + }, + fal_key, + timeout=300, + ) + images = result.get("images") or result.get("output", []) + if not images: + raise Exception(f"No images in actor result: {list(result.keys())}") + img_url = images[0]["url"] if isinstance(images[0], dict) else images[0] + img_path = os.path.join(output_dir, f"{title_slug}_actor_option_{i}.png") + with httpx.Client(timeout=60.0) as client: + img_resp = client.get(img_url) + with open(img_path, "wb") as f: + f.write(img_resp.content) + print(f"[SaaSShorts] ✅ Actor option {i+1}: {img_path}") + return img_path + + with ThreadPoolExecutor(max_workers=num_options) as executor: + futures = [executor.submit(_gen_one, i) for i in range(num_options)] + for future in as_completed(futures): + paths.append(future.result()) + + return sorted(paths) + + paths = [] + for i, img in enumerate(result.get("images", [])): + img_path = os.path.join(output_dir, f"{title_slug}_actor_option_{i}.png") + with httpx.Client(timeout=60.0) as client: + img_resp = client.get(img["url"]) + with open(img_path, "wb") as f: + f.write(img_resp.content) + paths.append(img_path) + print(f"[SaaSShorts] ✅ Actor option {i+1}: {img_path}") + + return paths + + +def generate_actor_image( + description: str, fal_key: str, output_path: str +) -> str: + """Generate a single actor image using Recraft V4.""" + output_dir = os.path.dirname(output_path) + title_slug = os.path.basename(output_path).replace("_actor.png", "") + paths = generate_actor_images(description, fal_key, output_dir, title_slug, num_options=1) + if paths: + import shutil + shutil.move(paths[0], output_path) + return output_path + + +def generate_voiceover( + text: str, + elevenlabs_key: str, + output_path: str, + voice_id: str = "21m00Tcm4TlvDq8ikWAM", +) -> str: + """Generate voiceover audio using ElevenLabs TTS.""" + print(f"[SaaSShorts] 🎙️ Generating voiceover ({len(text)} chars)...") + + url = f"{ELEVENLABS_API_BASE}/text-to-speech/{voice_id}" + + headers = { + "xi-api-key": elevenlabs_key, + "Content-Type": "application/json", + } + + body = { + "text": text, + "model_id": "eleven_multilingual_v2", + "voice_settings": { + "stability": 0.5, + "similarity_boost": 0.75, + "style": 0.4, + "use_speaker_boost": True, + }, + } + + with httpx.Client(timeout=120.0) as client: + resp = client.post(url, headers=headers, json=body) + if resp.status_code != 200: + raise Exception(f"ElevenLabs TTS error ({resp.status_code}): {resp.text}") + + with open(output_path, "wb") as f: + f.write(resp.content) + + print(f"[SaaSShorts] ✅ Voiceover: {output_path}") + return output_path + + +def get_elevenlabs_voices(elevenlabs_key: str) -> list: + """Fetch available voices from ElevenLabs.""" + url = f"{ELEVENLABS_API_BASE}/voices" + headers = {"xi-api-key": elevenlabs_key} + + with httpx.Client(timeout=15.0) as client: + resp = client.get(url, headers=headers) + if resp.status_code != 200: + return [] + data = resp.json() + + voices = [] + for v in data.get("voices", []): + voices.append({ + "voice_id": v["voice_id"], + "name": v["name"], + "category": v.get("category", ""), + "labels": v.get("labels", {}), + "preview_url": v.get("preview_url", ""), + }) + + return voices + + +# ═══════════════════════════════════════════════════════════════════════ +# Phase 3: Video Generation +# ═══════════════════════════════════════════════════════════════════════ + +def generate_talking_head( + image_path: str, + audio_path: str, + fal_key: str, + output_path: str, +) -> str: + """Generate talking head video using Kling Avatar v2 Standard on fal.ai.""" + print(f"[SaaSShorts] 🗣️ Generating talking head (Kling Avatar v2)...") + + # Upload image and audio to fal.ai CDN + image_url = _fal_upload_file(image_path, fal_key) + audio_url = _fal_upload_file(audio_path, fal_key) + + result = _fal_run( + "fal-ai/kling-video/ai-avatar/v2/standard", + { + "image_url": image_url, + "audio_url": audio_url, + "prompt": ( + "Natural UGC creator talking to camera. Expressive and energetic. " + "Subtle hand gestures to emphasize points. Slight head movements and nods. " + "Occasional leaning forward for emphasis. Relaxed shoulders, casual vibe. " + "Maintain eye contact with camera. Natural blinking and micro-expressions." + ), + }, + fal_key, + timeout=600, + ) + + video_url = result["video"]["url"] + + # Download video + with httpx.Client(timeout=180.0) as client: + vid_resp = client.get(video_url) + with open(output_path, "wb") as f: + f.write(vid_resp.content) + + print(f"[SaaSShorts] ✅ Talking head: {output_path}") + return output_path + + +def generate_talking_head_lowcost( + image_path: str, + audio_path: str, + fal_key: str, + output_path: str, +) -> str: + """ + Low-cost talking head: Hailuo 2.3 Fast img2video → VEED Lipsync. + ~$0.39 vs ~$1.69 for Kling Avatar v2. + """ + print(f"[SaaSShorts] 🗣️ Generating talking head (Low Cost: Hailuo + VEED Lipsync)...") + + # Step 1: Generate 6s video from image using MiniMax Hailuo 2.3 Fast ($0.19) + # Cache the Hailuo clip so retries don't re-generate it + hailuo_cache_path = output_path.replace(".mp4", "_hailuo_cache.mp4") + + if os.path.exists(hailuo_cache_path) and os.path.getsize(hailuo_cache_path) > 0: + print(f"[SaaSShorts] Hailuo clip cached, skipping generation.") + hailuo_video_url = _fal_upload_file(hailuo_cache_path, fal_key) + else: + image_url = _fal_upload_file(image_path, fal_key) + + hailuo_result = _fal_run( + "fal-ai/minimax/hailuo-2.3-fast/standard/image-to-video", + { + "image_url": image_url, + "prompt": ( + "Person talking to camera, subtle head nods and natural micro-expressions. " + "Gentle head movement, slight shoulder sway. Eye contact with camera. " + "Natural blinking. Soft ambient lighting. Smooth cinematic motion." + ), + }, + fal_key, + timeout=300, + ) + + print(f"[SaaSShorts] Hailuo response keys: {list(hailuo_result.keys())}") + if "video" in hailuo_result: + hailuo_video_url = hailuo_result["video"]["url"] if isinstance(hailuo_result["video"], dict) else hailuo_result["video"] + elif "video_url" in hailuo_result: + hailuo_video_url = hailuo_result["video_url"] + elif "output" in hailuo_result: + hailuo_video_url = hailuo_result["output"]["url"] if isinstance(hailuo_result["output"], dict) else hailuo_result["output"] + else: + raise Exception(f"No video in Hailuo result: {hailuo_result}") + + # Save Hailuo clip locally for retry cache + with httpx.Client(timeout=180.0) as client: + vid_resp = client.get(hailuo_video_url) + with open(hailuo_cache_path, "wb") as f: + f.write(vid_resp.content) + + print(f"[SaaSShorts] Hailuo 2.3 Fast 6s clip ready (cached for retry).") + + # Step 2: Upload audio for lip-sync + audio_url = _fal_upload_file(audio_path, fal_key) + + # Step 3: VEED Lipsync — high quality lip-sync with loop ($0.20 for 30s) + lipsync_result = _fal_run( + "veed/lipsync", + { + "video_url": hailuo_video_url, + "audio_url": audio_url, + }, + fal_key, + timeout=900, + ) + + print(f"[SaaSShorts] VEED Lipsync response keys: {list(lipsync_result.keys())}") + if "video" in lipsync_result: + lipsync_video_url = lipsync_result["video"]["url"] if isinstance(lipsync_result["video"], dict) else lipsync_result["video"] + else: + raise Exception(f"No video in VEED Lipsync result: {lipsync_result}") + + with httpx.Client(timeout=180.0) as client: + vid_resp = client.get(lipsync_video_url) + with open(output_path, "wb") as f: + f.write(vid_resp.content) + + print(f"[SaaSShorts] ✅ Talking head (low cost): {output_path}") + return output_path + + +def generate_broll( + prompt: str, fal_key: str, output_path: str, duration: str = "5" +) -> str: + """ + Generate b-roll: Recraft V4 image + Ken Burns zoom effect via FFmpeg. + """ + print(f"[SaaSShorts] 🎬 Generating b-roll image + Ken Burns effect...") + + dur_secs = int(duration) + img_path = output_path.replace(".mp4", "_img.png") + + # Step 1: Generate a high-quality still image with Flux 2 Pro + result = _fal_run( + "fal-ai/flux-2-pro", + { + "prompt": f"{prompt}. Cinematic, shallow depth of field, professional photography.", + "image_size": "portrait_4_3", + "safety_tolerance": 5, + }, + fal_key, + timeout=300, + ) + + # Flux 2 Pro returns images in "images" or "output" key + images = result.get("images") or result.get("output", []) + if not images: + raise Exception(f"No images in b-roll result: {list(result.keys())}") + img_url = images[0]["url"] if isinstance(images[0], dict) else images[0] + + with httpx.Client(timeout=60.0) as client: + img_resp = client.get(img_url) + with open(img_path, "wb") as f: + f.write(img_resp.content) + + # Step 2: Ken Burns effect — slow zoom in with slight pan + fps = 30 + total_frames = dur_secs * fps + # Zoom from 1.0x to 1.15x over duration (subtle, cinematic) + zoompan_filter = ( + f"scale=2160:3840," + f"zoompan=z='1+0.15*on/{total_frames}':" + f"x='iw/2-(iw/zoom/2)+10*on/{total_frames}':" + f"y='ih/2-(ih/zoom/2)':" + f"d={total_frames}:s=1080x1920:fps={fps}," + f"setsar=1" + ) + cmd = [ + "ffmpeg", "-y", + "-loop", "1", "-i", img_path, # Input 0: image + "-f", "lavfi", "-i", "anullsrc=r=44100:cl=stereo", # Input 1: silent audio + "-vf", zoompan_filter, + "-t", str(dur_secs), + "-map", "0:v", "-map", "1:a", + "-c:v", "libx264", "-preset", "fast", "-crf", "22", + "-pix_fmt", "yuv420p", + "-c:a", "aac", "-b:a", "128k", + "-shortest", + output_path, + ] + + subprocess.run(cmd, check=True, capture_output=True) + + # Cleanup temp image + if os.path.exists(img_path): + os.remove(img_path) + + print(f"[SaaSShorts] ✅ B-roll (Ken Burns): {output_path}") + return output_path + + +# ═══════════════════════════════════════════════════════════════════════ +# Phase 4: Compositing (FFmpeg) +# ═══════════════════════════════════════════════════════════════════════ + +def _get_media_duration(path: str) -> float: + """Get duration of a media file using ffprobe.""" + cmd = [ + "ffprobe", "-v", "error", + "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", + path, + ] + try: + result = subprocess.run(cmd, capture_output=True, text=True) + output = result.stdout.strip() + if output: + return float(output) + except Exception as e: + print(f"[SaaSShorts] ⚠️ ffprobe failed for {path}: {e}") + return 30.0 # Fallback to 30s estimate + + +def _format_ass_time(seconds: float) -> str: + """Format time for ASS subtitle format: H:MM:SS.cc""" + h = int(seconds // 3600) + m = int((seconds % 3600) // 60) + s = int(seconds % 60) + cs = int((seconds - int(seconds)) * 100) + return f"{h}:{m:02d}:{s:02d}.{cs:02d}" + + +def transcribe_audio_for_subs(audio_path: str) -> list: + """ + Transcribe audio with word-level timestamps using faster-whisper. + Returns list of {"word": str, "start": float, "end": float}. + """ + from faster_whisper import WhisperModel + + print(f"[SaaSShorts] 🎙️ Transcribing audio for subtitles...") + model = WhisperModel("base", device="cpu", compute_type="int8") + segments, info = model.transcribe(audio_path, word_timestamps=True) + + words = [] + for segment in segments: + if segment.words: + for w in segment.words: + words.append({ + "word": w.word.strip(), + "start": w.start, + "end": w.end, + }) + + print(f"[SaaSShorts] ✅ Transcribed {len(words)} words") + return words + + +def generate_tiktok_subs(audio_path: str, output_path: str, max_words: int = 3) -> str: + """ + Generate TikTok-style ASS subtitles from audio using Whisper transcription. + + Style: Big bold centered text, 1-3 words at a time, white with black outline. + Matches actual spoken words with precise timestamps. + """ + words = transcribe_audio_for_subs(audio_path) + if not words: + # Fallback: empty subtitle file + with open(output_path, "w") as f: + f.write("") + return output_path + + # Group words into chunks of max_words + chunks = [] + for i in range(0, len(words), max_words): + group = words[i : i + max_words] + text = " ".join(w["word"] for w in group).upper() + start = group[0]["start"] + end = group[-1]["end"] + chunks.append({"text": text, "start": start, "end": end}) + + # Build ASS file with TikTok style + ass_content = """[Script Info] +Title: TikTok Style Subs +ScriptType: v4.00+ +PlayResX: 1080 +PlayResY: 1920 +WrapStyle: 0 + +[V4+ Styles] +Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding +Style: TikTok,Arial Black,90,&H00FFFFFF,&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,4,0,2,40,40,120,1 + +[Events] +Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text +""" + + for chunk in chunks: + start = _format_ass_time(chunk["start"]) + end = _format_ass_time(chunk["end"]) + text = chunk["text"].replace("\n", "\\N") + ass_content += f"Dialogue: 0,{start},{end},TikTok,,0,0,0,,{text}\n" + + with open(output_path, "w", encoding="utf-8") as f: + f.write(ass_content) + + print(f"[SaaSShorts] ✅ TikTok subs: {len(chunks)} captions from {len(words)} words") + return output_path + + +def generate_srt_from_script(segments: list, output_path: str) -> str: + """Fallback: generate basic SRT from script segments (used if no audio available).""" + srt_content = "" + index = 1 + for seg in segments: + text = seg.get("subtitle_text") or seg.get("narration", "") + if not text: + continue + words = text.split() + chunk_size = 3 + start_time = seg["start"] + end_time = seg["end"] + duration = end_time - start_time + chunks = [words[i : i + chunk_size] for i in range(0, len(words), chunk_size)] + chunk_dur = duration / max(len(chunks), 1) + for i, chunk in enumerate(chunks): + cs = start_time + i * chunk_dur + ce = min(start_time + (i + 1) * chunk_dur, end_time) + h, m, s, ms = int(cs//3600), int((cs%3600)//60), int(cs%60), int((cs-int(cs))*1000) + h2, m2, s2, ms2 = int(ce//3600), int((ce%3600)//60), int(ce%60), int((ce-int(ce))*1000) + srt_content += f"{index}\n{h:02d}:{m:02d}:{s:02d},{ms:03d} --> {h2:02d}:{m2:02d}:{s2:02d},{ms2:03d}\n{' '.join(chunk).upper()}\n\n" + index += 1 + with open(output_path, "w", encoding="utf-8") as f: + f.write(srt_content) + return output_path + + +def composite_video( + talking_head_path: str, + broll_clips: List[Dict], + srt_path: str, + hook_text: str, + output_path: str, +) -> str: + """ + Composite talking head + b-roll inserts + subtitles into final video. + + broll_clips: [{"path": "/path/to/clip.mp4", "start": 12, "end": 17}] + """ + print(f"[SaaSShorts] 🎞️ Compositing final video...") + + # Determine subtitle filter based on file type + safe_sub = srt_path.replace("\\", "/").replace(":", "\\:") + if srt_path.endswith(".ass"): + # ASS has styles embedded — use ass filter directly + sub_filter = f"ass='{safe_sub}'" + else: + # SRT fallback with TikTok-ish styling + sub_style = ( + "Alignment=2,Fontname=Arial Black,Fontsize=24,PrimaryColour=&H00FFFFFF," + "OutlineColour=&H00000000,BorderStyle=1,Outline=4,Shadow=0,MarginV=120,Bold=-1" + ) + sub_filter = f"subtitles='{safe_sub}':force_style='{sub_style}'" + + if not broll_clips: + # Simple: talking head + subtitles only + cmd = [ + "ffmpeg", "-y", + "-i", talking_head_path, + "-vf", sub_filter, + "-c:v", "libx264", "-preset", "fast", "-crf", "22", + "-c:a", "aac", "-b:a", "128k", + output_path, + ] + subprocess.run(cmd, check=True) + print(f"[SaaSShorts] ✅ Final video (simple): {output_path}") + return output_path + + # Complex: talking head with b-roll inserts + th_duration = _get_media_duration(talking_head_path) + sorted_broll = sorted(broll_clips, key=lambda x: x["start"]) + + # Get actual b-roll durations and limit segment lengths + broll_durations = {} + for i, clip in enumerate(sorted_broll): + broll_durations[i] = _get_media_duration(clip["path"]) + print(f"[SaaSShorts] B-roll {i} actual duration: {broll_durations[i]:.1f}s") + + # Build segment list — limit b-roll segments to actual clip duration + segments = [] + prev_end = 0.0 + + for i, clip in enumerate(sorted_broll): + bstart = clip["start"] + actual_dur = broll_durations[i] + # B-roll segment can't be longer than the actual clip + bend = min(clip["end"], bstart + actual_dur) + + if prev_end < bstart: + segments.append({"type": "th", "start": prev_end, "end": bstart}) + + segments.append({ + "type": "broll", + "index": i, + "start": bstart, + "end": bend, + "duration": bend - bstart, + }) + prev_end = bend + + if prev_end < th_duration: + segments.append({"type": "th", "start": prev_end, "end": th_duration}) + + # Build FFmpeg filter_complex + inputs = ["-i", talking_head_path] + for clip in sorted_broll: + inputs.extend(["-i", clip["path"]]) + + filter_parts = [] + concat_parts = [] + + # Normalize all segments to same resolution and fps for concat + norm = "scale=1080:1920:force_original_aspect_ratio=decrease,pad=1080:1920:(ow-iw)/2:(oh-ih)/2,fps=30,setsar=1" + + for j, seg in enumerate(segments): + if seg["type"] == "th": + filter_parts.append( + f"[0:v]trim=start={seg['start']:.3f}:end={seg['end']:.3f},setpts=PTS-STARTPTS,{norm}[tv{j}]" + ) + filter_parts.append( + f"[0:a]atrim=start={seg['start']:.3f}:end={seg['end']:.3f},asetpts=PTS-STARTPTS[ta{j}]" + ) + concat_parts.append(f"[tv{j}][ta{j}]") + else: + idx = seg["index"] + 1 + dur = seg["duration"] + filter_parts.append( + f"[{idx}:v]trim=start=0:end={dur:.3f},setpts=PTS-STARTPTS,{norm}[bv{j}]" + ) + filter_parts.append( + f"[0:a]atrim=start={seg['start']:.3f}:end={seg['end']:.3f},asetpts=PTS-STARTPTS[ba{j}]" + ) + concat_parts.append(f"[bv{j}][ba{j}]") + + n = len(segments) + filter_parts.append( + f"{''.join(concat_parts)}concat=n={n}:v=1:a=1[outv][outa]" + ) + filter_parts.append( + f"[outv]{sub_filter}[finalv]" + ) + + filter_str = ";".join(filter_parts) + + cmd = [ + "ffmpeg", "-y", + *inputs, + "-filter_complex", filter_str, + "-map", "[finalv]", + "-map", "[outa]", + "-c:v", "libx264", "-preset", "fast", "-crf", "22", + "-c:a", "aac", "-b:a", "128k", + output_path, + ] + + subprocess.run(cmd, check=True) + print(f"[SaaSShorts] ✅ Final video (composite): {output_path}") + return output_path + + +# ═══════════════════════════════════════════════════════════════════════ +# Orchestrator: Full Pipeline +# ═══════════════════════════════════════════════════════════════════════ + +def generate_full_video( + script: dict, + config: dict, + output_dir: str, + log: Callable[[str], None] = print, +) -> dict: + """ + Full SaaSShorts video generation pipeline. + + Args: + script: A single script object from generate_scripts() + config: { + "fal_key": str, + "elevenlabs_key": str, + "voice_id": str (optional), + "actor_description": str (optional, overrides script), + } + output_dir: Directory to write output files + log: Callback for progress logging + + Returns: + {"video_path": str, "srt_path": str, "actor_image": str, "cost_estimate": dict} + """ + os.makedirs(output_dir, exist_ok=True) + + fal_key = config["fal_key"] + elevenlabs_key = config["elevenlabs_key"] + voice_id = config.get("voice_id", "21m00Tcm4TlvDq8ikWAM") + actor_desc = config.get("actor_description") or script.get("actor_description", "a young professional in their late 20s, wearing a casual modern outfit, clean background") + + title_slug = re.sub(r"[^a-z0-9]+", "_", script.get("title", "video").lower())[:30] + + # Paths + actor_img = os.path.join(output_dir, f"{title_slug}_actor.png") + audio_path = os.path.join(output_dir, f"{title_slug}_voice.mp3") + talking_head = os.path.join(output_dir, f"{title_slug}_head.mp4") + srt_path = os.path.join(output_dir, f"{title_slug}_subs.ass") + final_path = os.path.join(output_dir, f"{title_slug}_final.mp4") + + full_narration = script.get("full_narration", "") + if not full_narration: + full_narration = " ".join( + seg.get("narration", "") for seg in script.get("segments", []) + ) + + def _exists(path): + return os.path.exists(path) and os.path.getsize(path) > 0 + + # ── Step 1 & 2: Generate actor image + voiceover in parallel ── + # If user pre-selected an actor image, copy it + selected_actor = config.get("selected_actor_path") + if selected_actor and os.path.exists(selected_actor) and not _exists(actor_img): + import shutil + shutil.copy2(selected_actor, actor_img) + log("[1/6] Using pre-selected actor image.") + + need_img = not _exists(actor_img) + need_voice = not _exists(audio_path) + + if need_img or need_voice: + tasks = [] + if need_img: + tasks.append("actor image") + if need_voice: + tasks.append("voiceover") + log(f"[1/6] Generating {' + '.join(tasks)} (parallel)...") + + with ThreadPoolExecutor(max_workers=2) as executor: + future_img = executor.submit(generate_actor_image, actor_desc, fal_key, actor_img) if need_img else None + future_voice = executor.submit( + generate_voiceover, full_narration, elevenlabs_key, audio_path, voice_id + ) if need_voice else None + + if future_img: + actor_img = future_img.result() + if future_voice: + audio_path = future_voice.result() + + log("[2/6] Actor image and voiceover ready.") + else: + log("[1/6] Actor image and voiceover cached, skipping.") + log("[2/6] ✅ Using cached assets.") + + # ── Step 3: Generate talking head ── + video_mode = config.get("video_mode", "premium") + if not _exists(talking_head): + if video_mode == "lowcost": + log("[3/6] Generating talking head (Low Cost: Hailuo + VEED Lipsync)... This takes 2-5 minutes.") + talking_head = generate_talking_head_lowcost(actor_img, audio_path, fal_key, talking_head) + else: + log("[3/6] Generating talking head video (Kling Avatar v2)... This takes 2-5 minutes.") + talking_head = generate_talking_head(actor_img, audio_path, fal_key, talking_head) + log("[3/6] Talking head ready.") + else: + log("[3/6] ✅ Talking head cached, skipping.") + + # ── Step 4: Generate b-roll clips ── + broll_segments = [ + seg for seg in script.get("segments", []) + if seg.get("broll_prompt") and seg.get("visual") == "broll" + ] + + broll_clips = [] + if broll_segments: + # Check which b-roll clips need generating + broll_to_generate = [] + for i, seg in enumerate(broll_segments): + broll_path = os.path.join(output_dir, f"{title_slug}_broll_{i}.mp4") + if _exists(broll_path): + broll_clips.append({ + "path": broll_path, + "start": seg["start"], + "end": seg["end"], + }) + log(f" ✅ B-roll {i} cached, skipping.") + else: + broll_to_generate.append((i, seg, broll_path)) + + if broll_to_generate: + log(f"[4/6] Generating {len(broll_to_generate)} b-roll clips...") + with ThreadPoolExecutor(max_workers=3) as executor: + futures = {} + for i, seg, broll_path in broll_to_generate: + future = executor.submit( + generate_broll, seg["broll_prompt"], fal_key, broll_path + ) + futures[future] = {"seg": seg, "path": broll_path} + + for future in as_completed(futures): + info = futures[future] + try: + path = future.result() + broll_clips.append({ + "path": path, + "start": info["seg"]["start"], + "end": info["seg"]["end"], + }) + log(f" ✅ B-roll clip ready: {os.path.basename(path)}") + except Exception as e: + log(f" ⚠️ B-roll failed (skipping): {e}") + else: + log("[4/6] ✅ All b-roll cached, skipping.") + else: + log("[4/6] No b-roll segments in script, skipping.") + + # ── Step 5: Generate subtitles (from actual audio, not script text) ── + log("[5/6] Transcribing audio and generating TikTok-style subtitles...") + generate_tiktok_subs(audio_path, srt_path, max_words=2) + + # ── Step 6: Composite final video ── + log("[6/6] Compositing final video with FFmpeg...") + hook_text = script.get("hook_text", "") + composite_video(talking_head, broll_clips, srt_path, hook_text, final_path) + + log("🎉 Video generation complete!") + + # Cost estimate + audio_duration = _get_media_duration(audio_path) + if video_mode == "lowcost": + cost = { + "actor_image_flux": 0.05, + "voiceover_elevenlabs": round(len(full_narration) * 0.00003, 3), + "hailuo_img2video": 0.19, + "veed_lipsync": 0.20, + "broll_flux": round(len(broll_clips) * 0.05, 2), + "ffmpeg_compositing": 0.00, + } + else: + cost = { + "actor_image_flux": 0.05, + "voiceover_elevenlabs": round(len(full_narration) * 0.00003, 3), + "talking_head_kling": round(audio_duration * 0.056, 2), + "broll_kling": round(len(broll_clips) * 5 * 0.07, 2), + "ffmpeg_compositing": 0.00, + } + cost["total"] = round(sum(cost.values()), 2) + + return { + "video_path": final_path, + "video_filename": os.path.basename(final_path), + "srt_path": srt_path, + "actor_image": actor_img, + "duration": audio_duration, + "cost_estimate": cost, + } diff --git a/saasshorts.py b/saasshorts.py index 2a00472c..626ad169 100644 --- a/saasshorts.py +++ b/saasshorts.py @@ -1,1474 +1,22 @@ +"""Compat shim: re-exports openshorts.saas.pipeline at the original import path. + +The SaaS UGC pipeline moved to openshorts/saas/pipeline.py as part of the +restructure. A future commit may split it further into research / scripting / +media / compositing / pipeline modules per the plan; for now it lives as a +single module in the saas/ folder. New code should import from +`openshorts.saas.pipeline` directly; this shim keeps existing +`from saasshorts import ...` calls working. """ -SaaSShorts: AI-powered UGC video generator for SaaS products. - -Generates viral TikTok/Instagram Reels content from a SaaS URL. -Pipeline: - 1. Scrape & analyze SaaS website (Gemini) - 2. Generate video scripts (hook → problem → solution → CTA) - 3. Generate AI actor portrait (Flux Pro via fal.ai) - 4. Generate voiceover (ElevenLabs TTS) - 5. Generate talking head video (Kling Avatar v2 via fal.ai) - 6. Generate b-roll clips (Kling v2.6 via fal.ai) - 7. Composite final video with subtitles (FFmpeg) -""" - -import os -import re -import json -import time -import subprocess -import httpx -from urllib.parse import urljoin -from typing import Optional, List, Dict, Callable -from concurrent.futures import ThreadPoolExecutor, as_completed - - -ELEVENLABS_API_BASE = "https://api.elevenlabs.io/v1" -FAL_QUEUE_BASE = "https://queue.fal.run" - -# Default ElevenLabs voices (name → voice_id) -DEFAULT_VOICES = { - "Rachel (Female, calm)": "21m00Tcm4TlvDq8ikWAM", - "Drew (Male, confident)": "29vD33N1CtxCmqQRPOHJ", - "Bella (Female, soft)": "EXAVITQu4vr4xnSDxMaL", - "Antoni (Male, warm)": "ErXwobaYiN019PkySvjV", - "Josh (Male, deep)": "TxGEqnHWrfWFTfGW9XjX", - "Sam (Male, raspy)": "yoZ06aMxZJJ28mfd3POQ", -} - - -GEMINI_MODEL = "gemini-3-flash-preview" - - -# ═══════════════════════════════════════════════════════════════════════ -# Phase 1: Website Scraping, Web Research & Analysis -# ═══════════════════════════════════════════════════════════════════════ - -def research_saas_online(url: str, gemini_key: str) -> dict: - """ - Use Gemini with Google Search grounding to deeply research a SaaS product - across the internet: reviews, Reddit threads, Twitter, competitor comparisons, - pricing complaints, user testimonials, etc. - """ - from google import genai - from google.genai import types - - print(f"[SaaSShorts] 🔍 Researching {url} across the web (Google Search grounding)...") - - client = genai.Client(api_key=gemini_key) - - # Extract domain name for search queries - domain = url.replace("https://", "").replace("http://", "").split("/")[0] - - prompt = f"""You are a world-class SaaS market researcher. Research this product thoroughly using Google Search. - -Product URL: {url} -Domain: {domain} - -SEARCH AND INVESTIGATE: -1. What does this SaaS product do? (search their website, Product Hunt, G2, Capterra) -2. What are REAL user reviews saying? (G2, Capterra, TrustPilot, Reddit, Twitter/X) -3. What are the most common complaints and pain points users mention? -4. Who are their main competitors and how do they compare? -5. What is their pricing and do users think it's worth it? -6. What is their target market and ideal customer profile? -7. Are there any viral posts, memes, or discussions about this product? -8. What content creators or influencers have talked about them? - -Return a comprehensive JSON research report: -{{ - "product_name": "...", - "website_url": "{url}", - "what_it_does": "Detailed description of the product based on web research", - "target_market": "Who this product is for", - "pricing_info": "Pricing details found online (plans, costs, free tier)", - "user_sentiment": "overall positive/mixed/negative", - "real_reviews": [ - {{"source": "G2/Reddit/Twitter/etc", "quote": "actual user quote or paraphrase", "sentiment": "positive/negative/neutral"}}, - ... - ], - "common_complaints": ["complaint 1 from real users", "complaint 2", ...], - "common_praise": ["what users love 1", "what users love 2", ...], - "competitors": [ - {{"name": "competitor", "comparison": "how they compare"}} - ], - "viral_potential": ["angle 1 based on real discussions", "angle 2", ...], - "key_differentiators": ["what makes them unique based on research"], - "content_angles_from_web": ["angles found from existing content about this product"], - "sources_found": ["list of URLs where information was found"] -}} - -Be thorough. Use REAL data from your search results, not made-up information.""" - - response = client.models.generate_content( - model=GEMINI_MODEL, - contents=[prompt], - config=types.GenerateContentConfig( - tools=[types.Tool(google_search=types.GoogleSearch())], - ), - ) - - # Extract grounding sources - sources = [] - try: - metadata = response.candidates[0].grounding_metadata - if metadata and metadata.grounding_chunks: - for chunk in metadata.grounding_chunks: - if chunk.web: - sources.append({"title": chunk.web.title, "url": chunk.web.uri}) - if metadata and metadata.web_search_queries: - print(f"[SaaSShorts] Searches performed: {metadata.web_search_queries}") - except Exception: - pass - - # Parse response text as JSON - raw = response.text - if not raw: - print("[SaaSShorts] ⚠️ Gemini returned empty response for web research") - return {"raw_research": "", "product_name": domain, "grounding_sources": sources} - - text = raw.strip() - if text.startswith("```"): - text = re.sub(r"^```(?:json)?\n?", "", text) - text = re.sub(r"\n?```$", "", text) - - start = text.find("{") - end = text.rfind("}") - if start != -1 and end != -1: - text = text[start : end + 1] - - try: - research = json.loads(text) - except json.JSONDecodeError: - research = {"raw_research": text, "product_name": domain} - - research["grounding_sources"] = sources - print(f"[SaaSShorts] ✅ Web research complete: {len(sources)} sources found") - return research - - -def scrape_website(url: str) -> dict: - """Scrape a SaaS website to extract key content for analysis.""" - from bs4 import BeautifulSoup - - print(f"[SaaSShorts] 🌐 Scraping {url}...") - - headers = { - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" - } - - with httpx.Client(timeout=30.0, follow_redirects=True) as client: - response = client.get(url, headers=headers) - response.raise_for_status() - - soup = BeautifulSoup(response.text, "html.parser") - - # Remove non-content elements - for tag in soup(["script", "style", "nav", "footer", "header", "noscript", "svg", "iframe"]): - tag.decompose() - - # Extract metadata - meta_desc = "" - meta_tag = soup.find("meta", attrs={"name": "description"}) - if meta_tag: - meta_desc = meta_tag.get("content", "") - - og_desc = "" - og_tag = soup.find("meta", attrs={"property": "og:description"}) - if og_tag: - og_desc = og_tag.get("content", "") - - title = soup.title.string.strip() if soup.title and soup.title.string else "" - - # Extract headings - headings = [] - for h in soup.find_all(["h1", "h2", "h3"]): - text = h.get_text(strip=True) - if text and len(text) < 200: - headings.append(text) - - # Main text content - text = soup.get_text(separator="\n", strip=True) - text = re.sub(r"\n{3,}", "\n\n", text) - text = text[:10000] - - # Find subpages to scrape - base_host = httpx.URL(url).host - subpages = set() - for a in soup.find_all("a", href=True): - href = a["href"].lower() - if any(kw in href for kw in ["pricing", "features", "about", "product", "why", "how-it-works", "use-case"]): - try: - full_url = urljoin(url, a["href"]) - full_host = httpx.URL(full_url).host - if base_host and full_host and base_host == full_host: - subpages.add(full_url) - except Exception: - pass - - # Scrape subpages (max 3) - additional = "" - for sub_url in list(subpages)[:3]: - try: - print(f"[SaaSShorts] → Subpage: {sub_url}") - with httpx.Client(timeout=20.0, follow_redirects=True) as client: - resp = client.get(sub_url, headers=headers) - if resp.status_code == 200: - sub_soup = BeautifulSoup(resp.text, "html.parser") - for tag in sub_soup(["script", "style", "nav", "footer", "header", "noscript"]): - tag.decompose() - sub_text = sub_soup.get_text(separator="\n", strip=True)[:5000] - additional += f"\n\n--- {sub_url} ---\n{sub_text}" - except Exception as e: - print(f"[SaaSShorts] ⚠️ Failed: {e}") - - result = { - "url": url, - "title": title, - "meta_description": meta_desc or og_desc, - "headings": headings[:20], - "main_content": text, - "additional_pages": additional[:15000], - "pages_scraped": 1 + min(len(subpages), 3), - } - - print(f"[SaaSShorts] ✅ Scraped {result['pages_scraped']} pages, {len(text)} chars") - return result - - -def analyze_saas(scraped_data: dict, gemini_key: str, web_research: dict = None) -> dict: - """ - Deep analysis of a SaaS product combining website scraping + web research. - Uses Gemini 3 Flash for synthesis. - """ - from google import genai - from google.genai import types - - print(f"[SaaSShorts] 🧠 Analyzing {scraped_data['url']} (with web research)...") - - client = genai.Client(api_key=gemini_key) - - # Build web research context - research_context = "" - if web_research: - research_context = f""" -=== WEB RESEARCH (from Google Search) === -Product: {web_research.get('product_name', 'Unknown')} -What it does: {web_research.get('what_it_does', 'N/A')} -Target market: {web_research.get('target_market', 'N/A')} -Pricing: {web_research.get('pricing_info', 'N/A')} -User sentiment: {web_research.get('user_sentiment', 'N/A')} - -Real user reviews: -{json.dumps(web_research.get('real_reviews', [])[:8], indent=2)} - -Common complaints from real users: -{json.dumps(web_research.get('common_complaints', []), indent=2)} - -What users love: -{json.dumps(web_research.get('common_praise', []), indent=2)} - -Competitors: -{json.dumps(web_research.get('competitors', []), indent=2)} - -Viral angles from existing content: -{json.dumps(web_research.get('viral_potential', []), indent=2)} - -Key differentiators: -{json.dumps(web_research.get('key_differentiators', []), indent=2)} - -Content angles found online: -{json.dumps(web_research.get('content_angles_from_web', []), indent=2)} -""" - - prompt = f"""You are an expert SaaS marketing analyst and UGC content strategist. Analyze this SaaS product for creating viral UGC-style marketing videos. - -You have TWO sources of information: -1. The product's OWN WEBSITE (scraped content) -2. EXTERNAL WEB RESEARCH (real reviews, Reddit, competitor analysis, user sentiment from Google Search) - -Combine BOTH to create the most accurate and compelling analysis possible. Prioritize REAL user pain points and sentiments from the web research. - -Website: {scraped_data['url']} -Title: {scraped_data['title']} -Meta: {scraped_data['meta_description']} -Headings: {json.dumps(scraped_data['headings'][:15])} - -=== WEBSITE CONTENT === -{scraped_data['main_content'][:6000]} - -=== ADDITIONAL PAGES === -{scraped_data['additional_pages'][:8000]} -{research_context} - -Return a JSON object: -{{ - "product_name": "Name of the SaaS", - "one_liner": "One-sentence description", - "target_audience": ["audience 1", "audience 2", "audience 3"], - "pain_points": [ - {{"pain": "specific pain point (from real user feedback if available)", "intensity": "high/medium/low", "emotional_trigger": "frustration/fear/time-waste/money-loss/overwhelm", "source": "website/user-reviews/reddit/general"}} - ], - "key_features": ["feature 1", "feature 2", "feature 3"], - "unique_selling_points": ["usp 1", "usp 2"], - "competitors": [ - {{"name": "competitor", "comparison": "how they compare"}} - ], - "pricing_model": "freemium/subscription/one-time/usage-based", - "pricing_details": "specific pricing info if found", - "industry": "category", - "user_sentiment_summary": "what real users think overall", - "emotional_hooks": [ - "Stop wasting X hours on...", - "Your competitors are already using...", - "I wish I knew about this sooner..." - ], - "transformation_story": "Before (with real pain) → After (with product) narrative", - "viral_angles": [ - {{"angle": "description", "platform": "tiktok/instagram/both", "style": "ugc/educational/shock/story", "why_viral": "reason this angle works"}} - ] -}} - -IMPORTANT: Use REAL pain points from user reviews when available. Real frustrations make the best UGC content. -Include 5-8 pain points, 4-6 emotional hooks, and 4+ viral angles.""" - - response = client.models.generate_content( - model=GEMINI_MODEL, - contents=[prompt], - config=types.GenerateContentConfig(response_mime_type="application/json"), - ) - - raw = response.text - if not raw: - raise Exception("Gemini returned empty response for SaaS analysis") - - text = raw.strip() - if text.startswith("```"): - text = re.sub(r"^```(?:json)?\n?", "", text) - text = re.sub(r"\n?```$", "", text) - - start = text.find("{") - end = text.rfind("}") - if start != -1 and end != -1: - text = text[start : end + 1] - - try: - analysis = json.loads(text) - except json.JSONDecodeError as e: - raise Exception(f"Failed to parse analysis JSON: {e}\nRaw: {text[:500]}") - - # Attach web research sources for reference - if web_research and web_research.get("grounding_sources"): - analysis["_web_sources"] = web_research["grounding_sources"] - - print(f"[SaaSShorts] ✅ Analysis: {analysis.get('product_name', '?')} ({len(analysis.get('pain_points', []))} pain points)") - return analysis - - -def generate_scripts( - analysis: dict, - gemini_key: str, - num_scripts: int = 3, - style: str = "ugc", - language: str = "en", - actor_gender: str = "female", -) -> list: - """Generate video scripts based on SaaS analysis.""" - from google import genai - from google.genai import types - - lang_name = "Spanish" if language == "es" else "English" - print(f"[SaaSShorts] 📝 Generating {num_scripts} scripts ({style}, {lang_name})...") - - client = genai.Client(api_key=gemini_key) - - style_guide = { - "ugc": "Natural, authentic UGC style. Person talking to camera like sharing a discovery with a friend. Casual, genuine.", - "educational": "Educational style. Clear explanations.", - "shock": "Shock/discovery style. Surprising opener.", - "story": "Storytelling style. Mini narrative.", - "comparison": "Before/after comparison.", - } - - lang_instructions = "" - if language == "es": - lang_instructions = """ -LANGUAGE: ALL narrations, subtitles, captions, and hashtags MUST be in SPANISH (Spain/Latin America). -Use natural casual Spanish like a real person would speak on TikTok. Contractions, slang OK. -Examples of Spanish UGC hooks: "Tío, no me puedo creer que nadie me haya dicho esto antes...", "Si usas Excel para esto, necesitas ver esto YA", "Os voy a enseñar algo que me ha cambiado la vida..." -""" - else: - lang_instructions = """ -LANGUAGE: ALL narrations, subtitles, captions, and hashtags MUST be in ENGLISH. -Use natural casual American English like a real person on TikTok. Contractions, slang OK. -Examples of English UGC hooks: "Okay so I just found this tool and...", "Stop doing this manually, there's a better way", "I can't believe nobody told me about this sooner..." -""" - - prompt = f"""You are a viral short-form video scriptwriter for TikTok/Instagram Reels. -Generate {num_scripts} video scripts to promote this product/business. -{lang_instructions} -PRODUCT ANALYSIS: -{json.dumps(analysis, indent=2)} - -STYLE: {style_guide.get(style, style_guide['ugc'])} - -Each script MUST be 20-25 seconds total. NEVER longer than 25 seconds. - -YOU MUST USE EXACTLY THIS 5-SEGMENT STRUCTURE. NO EXCEPTIONS: -1. HOOK (0-5s): type="hook", visual="actor_talking", broll_prompt=null — Avatar says a punchy hook. -2. B-ROLL 1 (5-9s): type="problem", visual="broll", broll_prompt="..." (REQUIRED) — Visual of the problem. -3. BODY (9-16s): type="solution", visual="actor_talking", broll_prompt=null — Avatar presents the solution. -4. B-ROLL 2 (16-21s): type="demo", visual="broll", broll_prompt="..." (REQUIRED) — Visual of the product. -5. CTA (21-25s): type="cta", visual="actor_talking", broll_prompt=null — Avatar says CTA with link in bio. - -CRITICAL — READ CAREFULLY: -- EXACTLY 5 segments. Not 3, not 4, not 6. FIVE. -- Segments 2 and 4 MUST have visual="broll" and a non-null broll_prompt string. -- Segments 1, 3, 5 MUST have visual="actor_talking" and broll_prompt=null. -- duration_seconds MUST be between 20 and 25. -- full_narration = all narration text joined together. - -Return a JSON array: -[ - {{ - "title": "Short internal title", - "style": "{style}", - "duration_seconds": 23, - "target_platform": "tiktok", - "hook_text": "Hook overlay text (2-5 words max)", - "segments": [ - {{ - "type": "hook", - "start": 0, - "end": 5, - "narration": "Punchy hook the actor says", - "visual": "actor_talking", - "broll_prompt": null, - "emotion": "excited", - "subtitle_text": "Hook phrase" - }}, - {{ - "type": "problem", - "start": 5, - "end": 9, - "narration": "Voiceover describing the pain point", - "visual": "broll", - "broll_prompt": "REQUIRED: visual of the problem, e.g. person frustrated at laptop, cluttered spreadsheet on screen", - "emotion": "frustrated", - "subtitle_text": "Pain phrase" - }}, - {{ - "type": "solution", - "start": 9, - "end": 16, - "narration": "Actor introduces the product naturally", - "visual": "actor_talking", - "broll_prompt": null, - "emotion": "confident", - "subtitle_text": "Solution phrase" - }}, - {{ - "type": "demo", - "start": 16, - "end": 21, - "narration": "Voiceover showing the product in action", - "visual": "broll", - "broll_prompt": "REQUIRED: visual of the product/result, e.g. clean dashboard with metrics, modern app interface", - "emotion": "excited", - "subtitle_text": "Result phrase" - }}, - {{ - "type": "cta", - "start": 21, - "end": 23, - "narration": "Short CTA mentioning link in bio", - "visual": "actor_talking", - "broll_prompt": null, - "emotion": "confident", - "subtitle_text": "Link in bio" - }} - ], - "full_narration": "All narration text joined (only actor_talking segments)", - "actor_description": "Specific person description: age, gender, ethnicity, hair style, clothing. Casual everyday look.", - "hashtags": ["#saas", "#productivity", "#techtools"], - "caption": "Suggested Instagram/TikTok caption" - }} -] - -RULES: -- EXACTLY 5 segments in order: actor, broll, actor, broll, actor -- EXACTLY 2 broll segments with detailed broll_prompt (NOT null) -- full_narration = ALL narration text (both actor and broll voiceover segments joined) -- Total duration MUST be 18-22 seconds, never more -- Keep narrations punchy, conversational, with contractions -- Actor descriptions: casual, real-person look (NOT model/influencer) -- B-roll prompts: cinematic, specific, detailed visual descriptions -- Each script should use a different pain point / angle -- Vary actor demographics across scripts -- CTA MUST always mention "link in bio" / "enlace en la bio". Examples: "Link in bio, go try it", "Check the link in my bio", "El enlace está en la bio, probadlo" -- Write ALL text in {lang_name} -- Actor gender: {actor_gender}. ALL actor_description fields MUST describe a {actor_gender} person. Use diverse ages/ethnicities across scripts. -- IMPORTANT: actor_description MUST ALWAYS be in ENGLISH regardless of script language. Only describe physical appearance: age, gender, ethnicity, hair, clothing. NO actions, NO background, NO scene description. -- Actors must look European, attractive but natural, slightly nerdy/tech vibe. Vary across: blonde, brunette, redhead. Ages 22-35. -- If female: casual summer look (tank top, camisole, simple tee). If male: casual tee or hoodie. -- Example female: "a 26 year old attractive european woman, light brown wavy hair, wearing a white tank top, natural minimal makeup, friendly face" -- Example male: "a 29 year old european man, short dark hair, light stubble, wearing a navy t-shirt, smart casual look" """ - - response = client.models.generate_content( - model=GEMINI_MODEL, - contents=[prompt], - config=types.GenerateContentConfig( - response_mime_type="application/json", - max_output_tokens=8192, - ), - ) - - raw = response.text - if not raw: - raise Exception("Gemini returned empty response for script generation") - - text = raw.strip() - if text.startswith("```"): - text = re.sub(r"^```(?:json)?\n?", "", text) - text = re.sub(r"\n?```$", "", text) - - start = text.find("[") - end = text.rfind("]") - if start != -1 and end != -1: - text = text[start : end + 1] - - try: - scripts = json.loads(text) - except json.JSONDecodeError as e: - raise Exception(f"Failed to parse scripts JSON: {e}\nRaw: {text[:500]}") - - print(f"[SaaSShorts] ✅ Generated {len(scripts)} scripts") - return scripts - - -# ═══════════════════════════════════════════════════════════════════════ -# Phase 2: Asset Generation -# ═══════════════════════════════════════════════════════════════════════ - -def _fal_run(model_id: str, input_data: dict, fal_key: str, timeout: int = 600) -> dict: - """ - Submit a job to fal.ai queue, poll for completion, return result. - Uses the URLs returned by the submit response (as per fal.ai docs). - """ - headers = { - "Authorization": f"Key {fal_key}", - "Content-Type": "application/json", - } - - # ── Step 1: Submit to queue ── - submit_url = f"{FAL_QUEUE_BASE}/{model_id}" - print(f"[fal.ai] Submitting to {submit_url}...") - - with httpx.Client(timeout=120.0) as client: - resp = client.post(submit_url, headers=headers, json=input_data) - - if resp.status_code >= 400: - print(f"[fal.ai] Submit error: {resp.text[:500]}") - raise Exception(f"fal.ai error ({resp.status_code}): {resp.text[:300]}") - - try: - submit_data = resp.json() - except json.JSONDecodeError: - raise Exception(f"fal.ai invalid JSON: {resp.text[:300]}") - - request_id = submit_data.get("request_id") - if not request_id: - # Synchronous result (no queue) - return submit_data - - # Use the URLs from the submit response (guaranteed correct per docs) - status_url = submit_data.get("status_url", f"{FAL_QUEUE_BASE}/{model_id}/requests/{request_id}/status") - response_url = submit_data.get("response_url", f"{FAL_QUEUE_BASE}/{model_id}/requests/{request_id}") - - print(f"[fal.ai] Queued: {request_id}") - print(f"[fal.ai] Status URL: {status_url}") - - # ── Step 2: Poll for completion ── - poll_headers = {"Authorization": f"Key {fal_key}"} - start = time.time() - - while time.time() - start < timeout: - elapsed = int(time.time() - start) - try: - with httpx.Client(timeout=30.0) as client: - poll_resp = client.get(f"{status_url}?logs=1", headers=poll_headers) - status_data = poll_resp.json() - except Exception as e: - print(f"[fal.ai] Poll error (retrying): {e}") - time.sleep(5) - continue - - status = status_data.get("status", "UNKNOWN") - - if status == "COMPLETED": - print(f"[fal.ai] ✅ Completed in {elapsed}s! Fetching result...") - with httpx.Client(timeout=120.0) as client: - result_resp = client.get(response_url, headers=poll_headers) - return result_resp.json() - - elif status in ("FAILED", "CANCELLED"): - error = status_data.get("error", "unknown error") - raise Exception(f"fal.ai job {status}: {error}") - - # Log progress - queue_pos = status_data.get("queue_position", "") - pos_info = f" (pos: {queue_pos})" if queue_pos != "" else "" - print(f"[fal.ai] {model_id}: {status}{pos_info} ({elapsed}s)") - time.sleep(5) - - raise Exception(f"fal.ai job timed out after {timeout}s for {model_id}") - - -def _fal_upload_file(file_path: str, fal_key: str) -> str: - """Upload a local file to fal.ai CDN storage and return public URL.""" - headers = {"Authorization": f"Key {fal_key}"} - - filename = os.path.basename(file_path) - ext = os.path.splitext(filename)[1].lower() - content_types = { - ".png": "image/png", - ".jpg": "image/jpeg", - ".jpeg": "image/jpeg", - ".mp3": "audio/mpeg", - ".wav": "audio/wav", - ".mp4": "video/mp4", - ".webp": "image/webp", - } - content_type = content_types.get(ext, "application/octet-stream") - - # Initiate upload - with httpx.Client(timeout=30.0) as client: - resp = client.post( - "https://rest.alpha.fal.ai/storage/upload/initiate", - headers={**headers, "Content-Type": "application/json"}, - json={"file_name": filename, "content_type": content_type}, - ) - resp.raise_for_status() - upload_info = resp.json() - - upload_url = upload_info["upload_url"] - file_url = upload_info["file_url"] - - # Upload file content - with open(file_path, "rb") as f: - file_bytes = f.read() - - with httpx.Client(timeout=120.0) as client: - resp = client.put( - upload_url, - content=file_bytes, - headers={"Content-Type": content_type}, - ) - resp.raise_for_status() - - print(f"[fal.ai] Uploaded {filename} → {file_url}") - return file_url - - -def generate_actor_images( - description: str, fal_key: str, output_dir: str, title_slug: str, num_options: int = 3, - product_description: str = None, -) -> List[str]: - """Generate multiple hyper-realistic actor portrait options using Flux 2 Pro.""" - print(f"[SaaSShorts] 🎨 Generating {num_options} actor image options (Flux 2 Pro)...") - - # Clean description: strip scene/actions, keep only physical appearance - clean_desc = description - for remove in ["hablando", "talking", "sentad", "sitting", "desde", "from", "con una", "with a", "detrás", "behind"]: - if remove in clean_desc.lower(): - idx = clean_desc.lower().find(remove) - if idx > 10: - clean_desc = clean_desc[:idx].rstrip(" ,.") - - import random - img_num = random.randint(1000, 9999) - - if product_description: - prompt = f"""IMG_{img_num}.jpg Raw candid selfie of {clean_desc}, casually holding {product_description}, showing it to the camera with a natural smile. Product clearly visible in hand. Casual and real, not an ad. Low quality front camera, soft room lighting. Reddit selfie.""" - else: - prompt = f"""IMG_{img_num}.jpg Raw candid selfie of {clean_desc}, sitting at their desk at home, looking at camera with a relaxed natural smile. Headphones around neck, monitor glow behind them. Not posed, casual and real. Low quality front camera, soft room lighting. Reddit selfie.""" - - print(f"[SaaSShorts] Prompt: {prompt[:120]}...{' (with product)' if product_description else ''}") - - paths = [] - # Flux 2 Pro — #1 for photorealistic faces - def _gen_one(i): - result = _fal_run( - "fal-ai/flux-2-pro", - { - "prompt": prompt, - "image_size": "portrait_4_3", - "safety_tolerance": 5, - "seed": random.randint(0, 999999), - }, - fal_key, - timeout=300, - ) - images = result.get("images") or result.get("output", []) - if not images: - raise Exception(f"No images in actor result: {list(result.keys())}") - img_url = images[0]["url"] if isinstance(images[0], dict) else images[0] - img_path = os.path.join(output_dir, f"{title_slug}_actor_option_{i}.png") - with httpx.Client(timeout=60.0) as client: - img_resp = client.get(img_url) - with open(img_path, "wb") as f: - f.write(img_resp.content) - print(f"[SaaSShorts] ✅ Actor option {i+1}: {img_path}") - return img_path - - with ThreadPoolExecutor(max_workers=num_options) as executor: - futures = [executor.submit(_gen_one, i) for i in range(num_options)] - for future in as_completed(futures): - paths.append(future.result()) - - return sorted(paths) - - paths = [] - for i, img in enumerate(result.get("images", [])): - img_path = os.path.join(output_dir, f"{title_slug}_actor_option_{i}.png") - with httpx.Client(timeout=60.0) as client: - img_resp = client.get(img["url"]) - with open(img_path, "wb") as f: - f.write(img_resp.content) - paths.append(img_path) - print(f"[SaaSShorts] ✅ Actor option {i+1}: {img_path}") - - return paths - - -def generate_actor_image( - description: str, fal_key: str, output_path: str -) -> str: - """Generate a single actor image using Recraft V4.""" - output_dir = os.path.dirname(output_path) - title_slug = os.path.basename(output_path).replace("_actor.png", "") - paths = generate_actor_images(description, fal_key, output_dir, title_slug, num_options=1) - if paths: - import shutil - shutil.move(paths[0], output_path) - return output_path - - -def generate_voiceover( - text: str, - elevenlabs_key: str, - output_path: str, - voice_id: str = "21m00Tcm4TlvDq8ikWAM", -) -> str: - """Generate voiceover audio using ElevenLabs TTS.""" - print(f"[SaaSShorts] 🎙️ Generating voiceover ({len(text)} chars)...") - - url = f"{ELEVENLABS_API_BASE}/text-to-speech/{voice_id}" - - headers = { - "xi-api-key": elevenlabs_key, - "Content-Type": "application/json", - } - - body = { - "text": text, - "model_id": "eleven_multilingual_v2", - "voice_settings": { - "stability": 0.5, - "similarity_boost": 0.75, - "style": 0.4, - "use_speaker_boost": True, - }, - } - - with httpx.Client(timeout=120.0) as client: - resp = client.post(url, headers=headers, json=body) - if resp.status_code != 200: - raise Exception(f"ElevenLabs TTS error ({resp.status_code}): {resp.text}") - - with open(output_path, "wb") as f: - f.write(resp.content) - - print(f"[SaaSShorts] ✅ Voiceover: {output_path}") - return output_path - - -def get_elevenlabs_voices(elevenlabs_key: str) -> list: - """Fetch available voices from ElevenLabs.""" - url = f"{ELEVENLABS_API_BASE}/voices" - headers = {"xi-api-key": elevenlabs_key} - - with httpx.Client(timeout=15.0) as client: - resp = client.get(url, headers=headers) - if resp.status_code != 200: - return [] - data = resp.json() - - voices = [] - for v in data.get("voices", []): - voices.append({ - "voice_id": v["voice_id"], - "name": v["name"], - "category": v.get("category", ""), - "labels": v.get("labels", {}), - "preview_url": v.get("preview_url", ""), - }) - - return voices - - -# ═══════════════════════════════════════════════════════════════════════ -# Phase 3: Video Generation -# ═══════════════════════════════════════════════════════════════════════ - -def generate_talking_head( - image_path: str, - audio_path: str, - fal_key: str, - output_path: str, -) -> str: - """Generate talking head video using Kling Avatar v2 Standard on fal.ai.""" - print(f"[SaaSShorts] 🗣️ Generating talking head (Kling Avatar v2)...") - - # Upload image and audio to fal.ai CDN - image_url = _fal_upload_file(image_path, fal_key) - audio_url = _fal_upload_file(audio_path, fal_key) - - result = _fal_run( - "fal-ai/kling-video/ai-avatar/v2/standard", - { - "image_url": image_url, - "audio_url": audio_url, - "prompt": ( - "Natural UGC creator talking to camera. Expressive and energetic. " - "Subtle hand gestures to emphasize points. Slight head movements and nods. " - "Occasional leaning forward for emphasis. Relaxed shoulders, casual vibe. " - "Maintain eye contact with camera. Natural blinking and micro-expressions." - ), - }, - fal_key, - timeout=600, - ) - - video_url = result["video"]["url"] - - # Download video - with httpx.Client(timeout=180.0) as client: - vid_resp = client.get(video_url) - with open(output_path, "wb") as f: - f.write(vid_resp.content) - - print(f"[SaaSShorts] ✅ Talking head: {output_path}") - return output_path - - -def generate_talking_head_lowcost( - image_path: str, - audio_path: str, - fal_key: str, - output_path: str, -) -> str: - """ - Low-cost talking head: Hailuo 2.3 Fast img2video → VEED Lipsync. - ~$0.39 vs ~$1.69 for Kling Avatar v2. - """ - print(f"[SaaSShorts] 🗣️ Generating talking head (Low Cost: Hailuo + VEED Lipsync)...") - - # Step 1: Generate 6s video from image using MiniMax Hailuo 2.3 Fast ($0.19) - # Cache the Hailuo clip so retries don't re-generate it - hailuo_cache_path = output_path.replace(".mp4", "_hailuo_cache.mp4") - - if os.path.exists(hailuo_cache_path) and os.path.getsize(hailuo_cache_path) > 0: - print(f"[SaaSShorts] Hailuo clip cached, skipping generation.") - hailuo_video_url = _fal_upload_file(hailuo_cache_path, fal_key) - else: - image_url = _fal_upload_file(image_path, fal_key) - - hailuo_result = _fal_run( - "fal-ai/minimax/hailuo-2.3-fast/standard/image-to-video", - { - "image_url": image_url, - "prompt": ( - "Person talking to camera, subtle head nods and natural micro-expressions. " - "Gentle head movement, slight shoulder sway. Eye contact with camera. " - "Natural blinking. Soft ambient lighting. Smooth cinematic motion." - ), - }, - fal_key, - timeout=300, - ) - - print(f"[SaaSShorts] Hailuo response keys: {list(hailuo_result.keys())}") - if "video" in hailuo_result: - hailuo_video_url = hailuo_result["video"]["url"] if isinstance(hailuo_result["video"], dict) else hailuo_result["video"] - elif "video_url" in hailuo_result: - hailuo_video_url = hailuo_result["video_url"] - elif "output" in hailuo_result: - hailuo_video_url = hailuo_result["output"]["url"] if isinstance(hailuo_result["output"], dict) else hailuo_result["output"] - else: - raise Exception(f"No video in Hailuo result: {hailuo_result}") - - # Save Hailuo clip locally for retry cache - with httpx.Client(timeout=180.0) as client: - vid_resp = client.get(hailuo_video_url) - with open(hailuo_cache_path, "wb") as f: - f.write(vid_resp.content) - - print(f"[SaaSShorts] Hailuo 2.3 Fast 6s clip ready (cached for retry).") - - # Step 2: Upload audio for lip-sync - audio_url = _fal_upload_file(audio_path, fal_key) - - # Step 3: VEED Lipsync — high quality lip-sync with loop ($0.20 for 30s) - lipsync_result = _fal_run( - "veed/lipsync", - { - "video_url": hailuo_video_url, - "audio_url": audio_url, - }, - fal_key, - timeout=900, - ) - - print(f"[SaaSShorts] VEED Lipsync response keys: {list(lipsync_result.keys())}") - if "video" in lipsync_result: - lipsync_video_url = lipsync_result["video"]["url"] if isinstance(lipsync_result["video"], dict) else lipsync_result["video"] - else: - raise Exception(f"No video in VEED Lipsync result: {lipsync_result}") - - with httpx.Client(timeout=180.0) as client: - vid_resp = client.get(lipsync_video_url) - with open(output_path, "wb") as f: - f.write(vid_resp.content) - - print(f"[SaaSShorts] ✅ Talking head (low cost): {output_path}") - return output_path - - -def generate_broll( - prompt: str, fal_key: str, output_path: str, duration: str = "5" -) -> str: - """ - Generate b-roll: Recraft V4 image + Ken Burns zoom effect via FFmpeg. - """ - print(f"[SaaSShorts] 🎬 Generating b-roll image + Ken Burns effect...") - - dur_secs = int(duration) - img_path = output_path.replace(".mp4", "_img.png") - - # Step 1: Generate a high-quality still image with Flux 2 Pro - result = _fal_run( - "fal-ai/flux-2-pro", - { - "prompt": f"{prompt}. Cinematic, shallow depth of field, professional photography.", - "image_size": "portrait_4_3", - "safety_tolerance": 5, - }, - fal_key, - timeout=300, - ) - - # Flux 2 Pro returns images in "images" or "output" key - images = result.get("images") or result.get("output", []) - if not images: - raise Exception(f"No images in b-roll result: {list(result.keys())}") - img_url = images[0]["url"] if isinstance(images[0], dict) else images[0] - - with httpx.Client(timeout=60.0) as client: - img_resp = client.get(img_url) - with open(img_path, "wb") as f: - f.write(img_resp.content) - - # Step 2: Ken Burns effect — slow zoom in with slight pan - fps = 30 - total_frames = dur_secs * fps - # Zoom from 1.0x to 1.15x over duration (subtle, cinematic) - zoompan_filter = ( - f"scale=2160:3840," - f"zoompan=z='1+0.15*on/{total_frames}':" - f"x='iw/2-(iw/zoom/2)+10*on/{total_frames}':" - f"y='ih/2-(ih/zoom/2)':" - f"d={total_frames}:s=1080x1920:fps={fps}," - f"setsar=1" - ) - cmd = [ - "ffmpeg", "-y", - "-loop", "1", "-i", img_path, # Input 0: image - "-f", "lavfi", "-i", "anullsrc=r=44100:cl=stereo", # Input 1: silent audio - "-vf", zoompan_filter, - "-t", str(dur_secs), - "-map", "0:v", "-map", "1:a", - "-c:v", "libx264", "-preset", "fast", "-crf", "22", - "-pix_fmt", "yuv420p", - "-c:a", "aac", "-b:a", "128k", - "-shortest", - output_path, - ] - - subprocess.run(cmd, check=True, capture_output=True) - - # Cleanup temp image - if os.path.exists(img_path): - os.remove(img_path) - - print(f"[SaaSShorts] ✅ B-roll (Ken Burns): {output_path}") - return output_path - - -# ═══════════════════════════════════════════════════════════════════════ -# Phase 4: Compositing (FFmpeg) -# ═══════════════════════════════════════════════════════════════════════ - -def _get_media_duration(path: str) -> float: - """Get duration of a media file using ffprobe.""" - cmd = [ - "ffprobe", "-v", "error", - "-show_entries", "format=duration", - "-of", "default=noprint_wrappers=1:nokey=1", - path, - ] - try: - result = subprocess.run(cmd, capture_output=True, text=True) - output = result.stdout.strip() - if output: - return float(output) - except Exception as e: - print(f"[SaaSShorts] ⚠️ ffprobe failed for {path}: {e}") - return 30.0 # Fallback to 30s estimate - - -def _format_ass_time(seconds: float) -> str: - """Format time for ASS subtitle format: H:MM:SS.cc""" - h = int(seconds // 3600) - m = int((seconds % 3600) // 60) - s = int(seconds % 60) - cs = int((seconds - int(seconds)) * 100) - return f"{h}:{m:02d}:{s:02d}.{cs:02d}" - - -def transcribe_audio_for_subs(audio_path: str) -> list: - """ - Transcribe audio with word-level timestamps using faster-whisper. - Returns list of {"word": str, "start": float, "end": float}. - """ - from faster_whisper import WhisperModel - - print(f"[SaaSShorts] 🎙️ Transcribing audio for subtitles...") - model = WhisperModel("base", device="cpu", compute_type="int8") - segments, info = model.transcribe(audio_path, word_timestamps=True) - - words = [] - for segment in segments: - if segment.words: - for w in segment.words: - words.append({ - "word": w.word.strip(), - "start": w.start, - "end": w.end, - }) - - print(f"[SaaSShorts] ✅ Transcribed {len(words)} words") - return words - - -def generate_tiktok_subs(audio_path: str, output_path: str, max_words: int = 3) -> str: - """ - Generate TikTok-style ASS subtitles from audio using Whisper transcription. - - Style: Big bold centered text, 1-3 words at a time, white with black outline. - Matches actual spoken words with precise timestamps. - """ - words = transcribe_audio_for_subs(audio_path) - if not words: - # Fallback: empty subtitle file - with open(output_path, "w") as f: - f.write("") - return output_path - - # Group words into chunks of max_words - chunks = [] - for i in range(0, len(words), max_words): - group = words[i : i + max_words] - text = " ".join(w["word"] for w in group).upper() - start = group[0]["start"] - end = group[-1]["end"] - chunks.append({"text": text, "start": start, "end": end}) - - # Build ASS file with TikTok style - ass_content = """[Script Info] -Title: TikTok Style Subs -ScriptType: v4.00+ -PlayResX: 1080 -PlayResY: 1920 -WrapStyle: 0 - -[V4+ Styles] -Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding -Style: TikTok,Arial Black,90,&H00FFFFFF,&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,4,0,2,40,40,120,1 - -[Events] -Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text -""" - - for chunk in chunks: - start = _format_ass_time(chunk["start"]) - end = _format_ass_time(chunk["end"]) - text = chunk["text"].replace("\n", "\\N") - ass_content += f"Dialogue: 0,{start},{end},TikTok,,0,0,0,,{text}\n" - - with open(output_path, "w", encoding="utf-8") as f: - f.write(ass_content) - - print(f"[SaaSShorts] ✅ TikTok subs: {len(chunks)} captions from {len(words)} words") - return output_path - - -def generate_srt_from_script(segments: list, output_path: str) -> str: - """Fallback: generate basic SRT from script segments (used if no audio available).""" - srt_content = "" - index = 1 - for seg in segments: - text = seg.get("subtitle_text") or seg.get("narration", "") - if not text: - continue - words = text.split() - chunk_size = 3 - start_time = seg["start"] - end_time = seg["end"] - duration = end_time - start_time - chunks = [words[i : i + chunk_size] for i in range(0, len(words), chunk_size)] - chunk_dur = duration / max(len(chunks), 1) - for i, chunk in enumerate(chunks): - cs = start_time + i * chunk_dur - ce = min(start_time + (i + 1) * chunk_dur, end_time) - h, m, s, ms = int(cs//3600), int((cs%3600)//60), int(cs%60), int((cs-int(cs))*1000) - h2, m2, s2, ms2 = int(ce//3600), int((ce%3600)//60), int(ce%60), int((ce-int(ce))*1000) - srt_content += f"{index}\n{h:02d}:{m:02d}:{s:02d},{ms:03d} --> {h2:02d}:{m2:02d}:{s2:02d},{ms2:03d}\n{' '.join(chunk).upper()}\n\n" - index += 1 - with open(output_path, "w", encoding="utf-8") as f: - f.write(srt_content) - return output_path - - -def composite_video( - talking_head_path: str, - broll_clips: List[Dict], - srt_path: str, - hook_text: str, - output_path: str, -) -> str: - """ - Composite talking head + b-roll inserts + subtitles into final video. - - broll_clips: [{"path": "/path/to/clip.mp4", "start": 12, "end": 17}] - """ - print(f"[SaaSShorts] 🎞️ Compositing final video...") - - # Determine subtitle filter based on file type - safe_sub = srt_path.replace("\\", "/").replace(":", "\\:") - if srt_path.endswith(".ass"): - # ASS has styles embedded — use ass filter directly - sub_filter = f"ass='{safe_sub}'" - else: - # SRT fallback with TikTok-ish styling - sub_style = ( - "Alignment=2,Fontname=Arial Black,Fontsize=24,PrimaryColour=&H00FFFFFF," - "OutlineColour=&H00000000,BorderStyle=1,Outline=4,Shadow=0,MarginV=120,Bold=-1" - ) - sub_filter = f"subtitles='{safe_sub}':force_style='{sub_style}'" - - if not broll_clips: - # Simple: talking head + subtitles only - cmd = [ - "ffmpeg", "-y", - "-i", talking_head_path, - "-vf", sub_filter, - "-c:v", "libx264", "-preset", "fast", "-crf", "22", - "-c:a", "aac", "-b:a", "128k", - output_path, - ] - subprocess.run(cmd, check=True) - print(f"[SaaSShorts] ✅ Final video (simple): {output_path}") - return output_path - - # Complex: talking head with b-roll inserts - th_duration = _get_media_duration(talking_head_path) - sorted_broll = sorted(broll_clips, key=lambda x: x["start"]) - - # Get actual b-roll durations and limit segment lengths - broll_durations = {} - for i, clip in enumerate(sorted_broll): - broll_durations[i] = _get_media_duration(clip["path"]) - print(f"[SaaSShorts] B-roll {i} actual duration: {broll_durations[i]:.1f}s") - - # Build segment list — limit b-roll segments to actual clip duration - segments = [] - prev_end = 0.0 - - for i, clip in enumerate(sorted_broll): - bstart = clip["start"] - actual_dur = broll_durations[i] - # B-roll segment can't be longer than the actual clip - bend = min(clip["end"], bstart + actual_dur) - - if prev_end < bstart: - segments.append({"type": "th", "start": prev_end, "end": bstart}) - - segments.append({ - "type": "broll", - "index": i, - "start": bstart, - "end": bend, - "duration": bend - bstart, - }) - prev_end = bend - - if prev_end < th_duration: - segments.append({"type": "th", "start": prev_end, "end": th_duration}) - - # Build FFmpeg filter_complex - inputs = ["-i", talking_head_path] - for clip in sorted_broll: - inputs.extend(["-i", clip["path"]]) - - filter_parts = [] - concat_parts = [] - - # Normalize all segments to same resolution and fps for concat - norm = "scale=1080:1920:force_original_aspect_ratio=decrease,pad=1080:1920:(ow-iw)/2:(oh-ih)/2,fps=30,setsar=1" - - for j, seg in enumerate(segments): - if seg["type"] == "th": - filter_parts.append( - f"[0:v]trim=start={seg['start']:.3f}:end={seg['end']:.3f},setpts=PTS-STARTPTS,{norm}[tv{j}]" - ) - filter_parts.append( - f"[0:a]atrim=start={seg['start']:.3f}:end={seg['end']:.3f},asetpts=PTS-STARTPTS[ta{j}]" - ) - concat_parts.append(f"[tv{j}][ta{j}]") - else: - idx = seg["index"] + 1 - dur = seg["duration"] - filter_parts.append( - f"[{idx}:v]trim=start=0:end={dur:.3f},setpts=PTS-STARTPTS,{norm}[bv{j}]" - ) - filter_parts.append( - f"[0:a]atrim=start={seg['start']:.3f}:end={seg['end']:.3f},asetpts=PTS-STARTPTS[ba{j}]" - ) - concat_parts.append(f"[bv{j}][ba{j}]") - - n = len(segments) - filter_parts.append( - f"{''.join(concat_parts)}concat=n={n}:v=1:a=1[outv][outa]" - ) - filter_parts.append( - f"[outv]{sub_filter}[finalv]" - ) - - filter_str = ";".join(filter_parts) - - cmd = [ - "ffmpeg", "-y", - *inputs, - "-filter_complex", filter_str, - "-map", "[finalv]", - "-map", "[outa]", - "-c:v", "libx264", "-preset", "fast", "-crf", "22", - "-c:a", "aac", "-b:a", "128k", - output_path, - ] - - subprocess.run(cmd, check=True) - print(f"[SaaSShorts] ✅ Final video (composite): {output_path}") - return output_path - - -# ═══════════════════════════════════════════════════════════════════════ -# Orchestrator: Full Pipeline -# ═══════════════════════════════════════════════════════════════════════ - -def generate_full_video( - script: dict, - config: dict, - output_dir: str, - log: Callable[[str], None] = print, -) -> dict: - """ - Full SaaSShorts video generation pipeline. - - Args: - script: A single script object from generate_scripts() - config: { - "fal_key": str, - "elevenlabs_key": str, - "voice_id": str (optional), - "actor_description": str (optional, overrides script), - } - output_dir: Directory to write output files - log: Callback for progress logging - - Returns: - {"video_path": str, "srt_path": str, "actor_image": str, "cost_estimate": dict} - """ - os.makedirs(output_dir, exist_ok=True) - - fal_key = config["fal_key"] - elevenlabs_key = config["elevenlabs_key"] - voice_id = config.get("voice_id", "21m00Tcm4TlvDq8ikWAM") - actor_desc = config.get("actor_description") or script.get("actor_description", "a young professional in their late 20s, wearing a casual modern outfit, clean background") - - title_slug = re.sub(r"[^a-z0-9]+", "_", script.get("title", "video").lower())[:30] - - # Paths - actor_img = os.path.join(output_dir, f"{title_slug}_actor.png") - audio_path = os.path.join(output_dir, f"{title_slug}_voice.mp3") - talking_head = os.path.join(output_dir, f"{title_slug}_head.mp4") - srt_path = os.path.join(output_dir, f"{title_slug}_subs.ass") - final_path = os.path.join(output_dir, f"{title_slug}_final.mp4") - - full_narration = script.get("full_narration", "") - if not full_narration: - full_narration = " ".join( - seg.get("narration", "") for seg in script.get("segments", []) - ) - - def _exists(path): - return os.path.exists(path) and os.path.getsize(path) > 0 - - # ── Step 1 & 2: Generate actor image + voiceover in parallel ── - # If user pre-selected an actor image, copy it - selected_actor = config.get("selected_actor_path") - if selected_actor and os.path.exists(selected_actor) and not _exists(actor_img): - import shutil - shutil.copy2(selected_actor, actor_img) - log("[1/6] Using pre-selected actor image.") - - need_img = not _exists(actor_img) - need_voice = not _exists(audio_path) - - if need_img or need_voice: - tasks = [] - if need_img: - tasks.append("actor image") - if need_voice: - tasks.append("voiceover") - log(f"[1/6] Generating {' + '.join(tasks)} (parallel)...") - - with ThreadPoolExecutor(max_workers=2) as executor: - future_img = executor.submit(generate_actor_image, actor_desc, fal_key, actor_img) if need_img else None - future_voice = executor.submit( - generate_voiceover, full_narration, elevenlabs_key, audio_path, voice_id - ) if need_voice else None - - if future_img: - actor_img = future_img.result() - if future_voice: - audio_path = future_voice.result() - - log("[2/6] Actor image and voiceover ready.") - else: - log("[1/6] Actor image and voiceover cached, skipping.") - log("[2/6] ✅ Using cached assets.") - - # ── Step 3: Generate talking head ── - video_mode = config.get("video_mode", "premium") - if not _exists(talking_head): - if video_mode == "lowcost": - log("[3/6] Generating talking head (Low Cost: Hailuo + VEED Lipsync)... This takes 2-5 minutes.") - talking_head = generate_talking_head_lowcost(actor_img, audio_path, fal_key, talking_head) - else: - log("[3/6] Generating talking head video (Kling Avatar v2)... This takes 2-5 minutes.") - talking_head = generate_talking_head(actor_img, audio_path, fal_key, talking_head) - log("[3/6] Talking head ready.") - else: - log("[3/6] ✅ Talking head cached, skipping.") - - # ── Step 4: Generate b-roll clips ── - broll_segments = [ - seg for seg in script.get("segments", []) - if seg.get("broll_prompt") and seg.get("visual") == "broll" - ] - - broll_clips = [] - if broll_segments: - # Check which b-roll clips need generating - broll_to_generate = [] - for i, seg in enumerate(broll_segments): - broll_path = os.path.join(output_dir, f"{title_slug}_broll_{i}.mp4") - if _exists(broll_path): - broll_clips.append({ - "path": broll_path, - "start": seg["start"], - "end": seg["end"], - }) - log(f" ✅ B-roll {i} cached, skipping.") - else: - broll_to_generate.append((i, seg, broll_path)) - - if broll_to_generate: - log(f"[4/6] Generating {len(broll_to_generate)} b-roll clips...") - with ThreadPoolExecutor(max_workers=3) as executor: - futures = {} - for i, seg, broll_path in broll_to_generate: - future = executor.submit( - generate_broll, seg["broll_prompt"], fal_key, broll_path - ) - futures[future] = {"seg": seg, "path": broll_path} - - for future in as_completed(futures): - info = futures[future] - try: - path = future.result() - broll_clips.append({ - "path": path, - "start": info["seg"]["start"], - "end": info["seg"]["end"], - }) - log(f" ✅ B-roll clip ready: {os.path.basename(path)}") - except Exception as e: - log(f" ⚠️ B-roll failed (skipping): {e}") - else: - log("[4/6] ✅ All b-roll cached, skipping.") - else: - log("[4/6] No b-roll segments in script, skipping.") - - # ── Step 5: Generate subtitles (from actual audio, not script text) ── - log("[5/6] Transcribing audio and generating TikTok-style subtitles...") - generate_tiktok_subs(audio_path, srt_path, max_words=2) - - # ── Step 6: Composite final video ── - log("[6/6] Compositing final video with FFmpeg...") - hook_text = script.get("hook_text", "") - composite_video(talking_head, broll_clips, srt_path, hook_text, final_path) - - log("🎉 Video generation complete!") - - # Cost estimate - audio_duration = _get_media_duration(audio_path) - if video_mode == "lowcost": - cost = { - "actor_image_flux": 0.05, - "voiceover_elevenlabs": round(len(full_narration) * 0.00003, 3), - "hailuo_img2video": 0.19, - "veed_lipsync": 0.20, - "broll_flux": round(len(broll_clips) * 0.05, 2), - "ffmpeg_compositing": 0.00, - } - else: - cost = { - "actor_image_flux": 0.05, - "voiceover_elevenlabs": round(len(full_narration) * 0.00003, 3), - "talking_head_kling": round(audio_duration * 0.056, 2), - "broll_kling": round(len(broll_clips) * 5 * 0.07, 2), - "ffmpeg_compositing": 0.00, - } - cost["total"] = round(sum(cost.values()), 2) - - return { - "video_path": final_path, - "video_filename": os.path.basename(final_path), - "srt_path": srt_path, - "actor_image": actor_img, - "duration": audio_duration, - "cost_estimate": cost, - } +from openshorts.saas.pipeline import * # noqa: F401,F403 +from openshorts.saas.pipeline import ( # noqa: F401 + scrape_website, + research_saas_online, + analyze_saas, + generate_scripts, + generate_full_video, + generate_actor_images, + generate_actor_image, + generate_voiceover, + get_elevenlabs_voices, + DEFAULT_VOICES, +) From 3169c48c8e42feff4029fd1b14630da9b898aec4 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 15:58:27 -0400 Subject: [PATCH 11/43] chore(restructure): add openshorts/app.py re-export for Docker entrypoint Phase 1 step 9 (minimum viable): expose the FastAPI app at ``openshorts.app:app`` so the Dockerfile / docker-compose entrypoint can target the package path. The actual route handlers still live in the root-level app.py (2256 lines, 32 routes) during the restructure; the full split into routers (process, editing, subtitles, hooks, translation, thumbnails, saasshorts, social + the future audio/layouts/motion_graphics domains) is intentionally deferred to a follow-up commit to keep this change focused. The plan and ROADMAP track the deferred work. Tests stay 62/62 green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- openshorts/app.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 openshorts/app.py diff --git a/openshorts/app.py b/openshorts/app.py new file mode 100644 index 00000000..a8a0ece6 --- /dev/null +++ b/openshorts/app.py @@ -0,0 +1,23 @@ +"""FastAPI application entrypoint for the openshorts package. + +This module exposes the FastAPI ``app`` instance used by uvicorn: + + uvicorn openshorts.app:app --host 0.0.0.0 --port 8000 + +The actual route handlers still live in the root-level ``app.py`` during the +restructure. A future commit will split that monolith into the planned router +modules under openshorts/routes/ (process, editing, subtitles, hooks, +translation, thumbnails, saasshorts, audio, layouts, motion_graphics, social). +Until then this module simply re-exports the existing FastAPI instance so the +Dockerfile / docker-compose entrypoint can target the package path. +""" +import os +import sys + +# Make sure the repo root is on sys.path so `import app` resolves to the +# original root-level app.py rather than this package's own openshorts/app.py. +_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) +if _REPO_ROOT not in sys.path: + sys.path.insert(0, _REPO_ROOT) + +from app import app # noqa: E402,F401 From c2204d956e2759c9bea3e6a3e602499befc45aec Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 15:59:26 -0400 Subject: [PATCH 12/43] chore(restructure): add openshorts/video/ffmpeg.py wrapper scaffold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 step 10: introduce the single FFmpeg wrapper module that the plan calls for. The scaffold exposes the helpers needed by the existing call sites (run, probe_resolution, probe_duration, cut, extract_audio, mux_video_audio, overlay_png) plus a build_filter_complex composer that the future motion-graphics compositor and audio mixer will use to batch overlay/eq/amix operations into a single ffmpeg invocation. Migration of every existing ``subprocess.run(['ffmpeg', ...])`` call to this wrapper is deferred — it's incremental per-caller work that benefits from running between commits with the test suite green. The ROADMAP documents the migration as a follow-up. Tests stay 62/62 green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- openshorts/video/ffmpeg.py | 168 +++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 openshorts/video/ffmpeg.py diff --git a/openshorts/video/ffmpeg.py b/openshorts/video/ffmpeg.py new file mode 100644 index 00000000..95a7a973 --- /dev/null +++ b/openshorts/video/ffmpeg.py @@ -0,0 +1,168 @@ +"""Single FFmpeg wrapper for the entire codebase. + +Goal: every ``subprocess.run(['ffmpeg', ...])`` call in the project should funnel +through one of these helpers. This makes it possible to: + +1. Inject a global timeout / progress callback / logging hook in one place. +2. Build complex ``filter_complex`` chains by composition (used by the future + motion-graphics compositor and the audio mixer). +3. Test the FFmpeg surface by patching this module instead of patching + ``subprocess.run`` globally. + +The scaffold below is intentionally small. Migration of existing call sites +(in app.py, openshorts/video/pipeline.py, openshorts/overlays/*.py, etc.) is +done incrementally in follow-up commits to keep each change small and the +test suite green between commits. See ROADMAP.md for the migration plan. +""" + +import os +import subprocess +from typing import Iterable, List, Optional, Sequence + + +class FFmpegError(RuntimeError): + """Raised when an ffmpeg/ffprobe invocation exits non-zero.""" + + def __init__(self, returncode: int, stderr: bytes, cmd: Sequence[str]): + self.returncode = returncode + self.stderr = stderr + self.cmd = list(cmd) + super().__init__( + f"ffmpeg failed (rc={returncode}): {' '.join(cmd[:6])}... — " + f"{stderr.decode(errors='replace')[:500]}" + ) + + +def run( + args: Sequence[str], + *, + check: bool = True, + capture_output: bool = True, + env: Optional[dict] = None, + timeout: Optional[float] = None, +) -> subprocess.CompletedProcess: + """Invoke a fully-formed ffmpeg/ffprobe command. + + Use this for one-shot ffmpeg invocations (encode, mux, probe). For + multi-input filter graphs, build the args with ``build_filter_complex`` + and pass them through here. + """ + cmd = ["ffmpeg", *args] if not (args and args[0].endswith("ffprobe")) else list(args) + if cmd[0] != "ffmpeg" and not cmd[0].endswith("ffprobe"): + cmd = ["ffmpeg", *cmd] + + # Always force UTF-8 locale; ffmpeg + non-ascii filenames on minimal + # docker images otherwise blow up with UnicodeEncodeError from subprocess. + full_env = os.environ.copy() + if env: + full_env.update(env) + full_env.setdefault("LANG", "C.UTF-8") + full_env.setdefault("LC_ALL", "C.UTF-8") + + result = subprocess.run( + cmd, + check=False, + stdout=subprocess.PIPE if capture_output else None, + stderr=subprocess.PIPE if capture_output else None, + env=full_env, + timeout=timeout, + ) + + if check and result.returncode != 0: + raise FFmpegError(result.returncode, result.stderr or b"", cmd) + + return result + + +def probe_resolution(video_path: str) -> tuple: + """Return ``(width, height)`` for the first video stream of ``video_path``.""" + cmd = [ + "ffprobe", "-v", "error", + "-select_streams", "v:0", + "-show_entries", "stream=width,height", + "-of", "csv=s=x:p=0", + video_path, + ] + result = subprocess.run( + cmd, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env={**os.environ, "LANG": "C.UTF-8"}, + ) + width, height = result.stdout.decode().strip().split("x") + return int(width), int(height) + + +def probe_duration(video_path: str) -> float: + """Return container duration in seconds.""" + cmd = [ + "ffprobe", "-v", "error", + "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", + video_path, + ] + result = subprocess.run( + cmd, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + return float(result.stdout.decode().strip()) + + +def cut(input_video: str, output: str, start: float, end: float, + *, crf: int = 18, preset: str = "fast") -> None: + """Cut ``input_video`` to ``[start, end]`` (re-encoded for accuracy).""" + run([ + "-y", + "-ss", str(start), + "-to", str(end), + "-i", input_video, + "-c:v", "libx264", "-crf", str(crf), "-preset", preset, + "-c:a", "aac", + output, + ]) + + +def extract_audio(input_video: str, output_audio: str) -> None: + """Copy the audio stream from ``input_video`` into ``output_audio``.""" + run([ + "-y", "-i", input_video, + "-vn", "-acodec", "copy", + output_audio, + ]) + + +def mux_video_audio(video_path: str, audio_path: str, output: str) -> None: + """Combine a video-only file with an audio file (stream copy, no re-encode).""" + run([ + "-y", + "-i", video_path, + "-i", audio_path, + "-c:v", "copy", "-c:a", "copy", + output, + ]) + + +def overlay_png(video_path: str, png_path: str, output: str, + *, x: int, y: int, crf: int = 22) -> None: + """Burn a single PNG overlay onto ``video_path`` at ``(x, y)``.""" + run([ + "-y", + "-i", video_path, + "-i", png_path, + "-filter_complex", f"[0:v][1:v]overlay={x}:{y}", + "-c:a", "copy", + "-c:v", "libx264", "-preset", "fast", "-crf", str(crf), + output, + ]) + + +def build_filter_complex(chains: Iterable[str]) -> str: + """Join multiple filter chains with ``;`` (a standard FFmpeg filter_complex). + + Used by the future motion-graphics compositor and audio mixer to batch + multiple overlay/eq/amix operations into a single ffmpeg invocation. + """ + return ";".join(c for c in chains if c) From 84310c7f7de0f9e8d0c3430047cbb304ac8fb34d Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 15:59:59 -0400 Subject: [PATCH 13/43] chore(restructure): Dockerfile CMD points at openshorts.app:app Phase 1 step 11: update the container entrypoint to the new package path. openshorts.app re-exports the FastAPI instance from the root-level app.py (it inserts the repo root on sys.path itself, so no editable install is needed). docker-compose.yml inherits this via the backend service. Tests stay 62/62 green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 95bfd1f8..498b6943 100644 --- a/Dockerfile +++ b/Dockerfile @@ -61,4 +61,6 @@ RUN python -c "from ultralytics import YOLO; YOLO('yolov8n.pt')" EXPOSE 8000 # Run FastAPI app -CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] +# openshorts.app re-exports the FastAPI instance from the root-level app.py +# (it inserts the repo root onto sys.path itself, no pip install -e needed). +CMD ["uvicorn", "openshorts.app:app", "--host", "0.0.0.0", "--port", "8000"] From 6496d69df488a40dbeb439fdc64893daecbeddde Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 16:01:06 -0400 Subject: [PATCH 14/43] docs(env): expand .env.example to match what the code actually reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 3: replace the AWS-only stub with the complete set of env vars the codebase reads via os.getenv: - GEMINI_API_KEY (required — viral clip extractor) - AWS_* (optional — S3 clip/actor/video galleries) - DISABLE_YOUTUBE_URL (gate the YouTube tab) - YOUTUBE_COOKIES (yt-dlp bot-detection workaround) - RENDER_SERVICE_URL (Remotion proxy) - MAX_CONCURRENT_JOBS (asyncio semaphore in job queue) - VITE_API_URL + VITE_ENCRYPTION_KEY (frontend) Documents that ELEVENLABS_API_KEY / UPLOAD_POST_API_KEY / FAL_KEY come from the browser via headers (encrypted in localStorage), not server- side env — they're listed at the bottom as commented hints in case a deployer wants to wire a server default later. Tests stay 62/62 green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- .env.example | 68 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/.env.example b/.env.example index 7a2e21f7..09e6e132 100644 --- a/.env.example +++ b/.env.example @@ -1,9 +1,63 @@ -# AWS S3 (optional — for clip backup/gallery) -AWS_ACCESS_KEY_ID=your_aws_access_key_here -AWS_SECRET_ACCESS_KEY=your_aws_secret_key_here +# ============================================================================= +# OpenShorts environment configuration +# +# Copy this file to `.env` (gitignored) and fill in the values you need. +# Most API keys are stored encrypted in the browser and sent via headers — they +# do NOT need to be set here unless you want a server-side fallback. +# ============================================================================= + +# --- Required (server-side reads via os.getenv) ----------------------------- + +# Google Gemini API key — used by the viral-clip extractor in main.py. +# https://ai.google.dev/gemini-api/docs/api-key +GEMINI_API_KEY= + +# --- Optional: AWS S3 (clip backup + public gallery) ------------------------ + +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= AWS_REGION=eu-west-3 -AWS_S3_BUCKET=your-bucket-name -AWS_S3_PUBLIC_BUCKET=your-public-bucket-name +AWS_S3_BUCKET= +AWS_S3_PUBLIC_BUCKET= + +# --- Optional: YouTube ingestion -------------------------------------------- + +# Disable the YouTube URL ingest tab entirely (uploads-only mode). +DISABLE_YOUTUBE_URL=false + +# Netscape-format cookies (concatenated into one line) to bypass YouTube's +# bot-detection on server IPs. yt-dlp writes this to /app/cookies.txt at +# container startup. +# YOUTUBE_COOKIES= + +# --- Optional: Remotion render service -------------------------------------- + +# URL of the render-service container (only used by /api/render). +RENDER_SERVICE_URL=http://renderer:3100 + +# --- Tuning ----------------------------------------------------------------- + +# Max concurrent video-processing jobs (asyncio semaphore in the job queue). +MAX_CONCURRENT_JOBS=5 + +# ============================================================================= +# Frontend (dashboard/) — Vite reads these at build time +# ============================================================================= + +# Production API URL override (defaults to relative paths in dev). +VITE_API_URL=http://localhost:8000 + +# Optional salt for localStorage API-key encryption. +# VITE_ENCRYPTION_KEY= + +# ============================================================================= +# Client-side keys (stored encrypted in the browser, sent via headers per-call) +# +# These are listed here for reference. The Python code does NOT read them from +# .env — set them in the dashboard UI instead. Listed below in case you want +# a server-side default (would require code changes in app.py to honor them). +# ============================================================================= -# YouTube cookies (optional — paste Netscape-format cookies to bypass bot detection) -# YOUTUBE_COOKIES=... +# ELEVENLABS_API_KEY= +# UPLOAD_POST_API_KEY= +# FAL_KEY= From a32e8e5a9b84ecc9bdbe5fd2004cb9c99432ffe8 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 16:02:45 -0400 Subject: [PATCH 15/43] chore(tooling): add CLAUDE.md auto-updater + pre-commit hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 4: enforce the "every Python module ships with a one-line docstring" convention mechanically so CLAUDE.md stays in sync with the codebase rather than relying on advisory adherence. - scripts/update_claude_md.py walks openshorts/, parses each module's ast for its docstring + public surface, reads .env.example, and rewrites the three auto-managed sections of CLAUDE.md between marker comments (REPO-MAP, MODULE-MAP, ENV). It exits non-zero with a list of offenders if any module lacks a docstring — that failure mode is what enforces the convention. - scripts/install_hooks.sh: one-liner that runs `pre-commit install`. - .pre-commit-config.yaml: runs the updater on every commit. Since the hook regenerates CLAUDE.md and the resulting changes need to be re-staged, developers should rerun `git add CLAUDE.md && git commit` after a code change touches module structure. CLAUDE.md itself stays untouched in this commit — the actual rewrite with markers in the right place happens in Phase 2. Tests stay 62/62 green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- .pre-commit-config.yaml | 10 ++ scripts/install_hooks.sh | 20 +++ scripts/update_claude_md.py | 254 ++++++++++++++++++++++++++++++++++++ 3 files changed, 284 insertions(+) create mode 100644 .pre-commit-config.yaml create mode 100755 scripts/install_hooks.sh create mode 100755 scripts/update_claude_md.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..5a0f6709 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +repos: + - repo: local + hooks: + - id: update-claude-md + name: Regenerate CLAUDE.md auto-managed sections + entry: python scripts/update_claude_md.py + language: system + pass_filenames: false + always_run: true + stages: [pre-commit] diff --git a/scripts/install_hooks.sh b/scripts/install_hooks.sh new file mode 100755 index 00000000..23238c8f --- /dev/null +++ b/scripts/install_hooks.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# Installs the pre-commit hook that keeps CLAUDE.md in sync with the codebase. +# +# Run once after cloning: bash scripts/install_hooks.sh + +set -euo pipefail + +cd "$(dirname "$0")/.." + +if ! command -v pre-commit > /dev/null 2>&1; then + echo "⚠️ pre-commit not installed." + echo " Install it first: pip install pre-commit" + echo " Then re-run: bash scripts/install_hooks.sh" + exit 1 +fi + +pre-commit install +echo "✅ pre-commit hooks installed." +echo " CLAUDE.md will be regenerated on every commit." +echo " To run manually: python scripts/update_claude_md.py" diff --git a/scripts/update_claude_md.py b/scripts/update_claude_md.py new file mode 100755 index 00000000..4602f3fa --- /dev/null +++ b/scripts/update_claude_md.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 +""" +Regenerate the auto-managed sections of CLAUDE.md. + +What this does +============== + +CLAUDE.md is split into hand-written prose and three auto-managed tables. The +auto-managed sections live between marker comments and get rewritten by this +script on every commit (via the pre-commit hook in .pre-commit-config.yaml): + + <!-- AUTO:REPO-MAP:START --> ... <!-- AUTO:REPO-MAP:END --> + <!-- AUTO:MODULE-MAP:START --> ... <!-- AUTO:MODULE-MAP:END --> + <!-- AUTO:ENV:START --> ... <!-- AUTO:ENV:END --> + +The script: +1. Walks the repo and lists top-level folders (REPO-MAP). +2. Parses every openshorts/*.py module via ast, extracting the one-line + docstring + the names of public functions/classes (MODULE-MAP). +3. Reads .env.example and renders the env-vars table (ENV). +4. Locates the markers in CLAUDE.md and rewrites only the content between them. + +It is idempotent: running it twice with no source changes is a no-op. + +Convention enforcement +====================== + +This script exits non-zero (with a list of offenders) if any module under +openshorts/ is missing a module docstring. The pre-commit hook will fail the +commit until the developer adds one. This is how the "every module has a +one-liner" convention from CLAUDE.md becomes mechanically enforced — without +this, CLAUDE.md is just an advisory and drifts. + +Usage +===== + + python scripts/update_claude_md.py # rewrite CLAUDE.md in place + python scripts/update_claude_md.py --check # exit non-zero if a rewrite is needed (CI mode) +""" + +from __future__ import annotations + +import ast +import argparse +import os +import re +import sys +from pathlib import Path +from typing import List, Tuple + +REPO_ROOT = Path(__file__).resolve().parent.parent +PACKAGE_ROOT = REPO_ROOT / "openshorts" +CLAUDE_MD = REPO_ROOT / "CLAUDE.md" +ENV_EXAMPLE = REPO_ROOT / ".env.example" + + +# --------------------------------------------------------------------------- +# Marker handling +# --------------------------------------------------------------------------- + +MARKERS = { + "REPO-MAP": ("<!-- AUTO:REPO-MAP:START -->", "<!-- AUTO:REPO-MAP:END -->"), + "MODULE-MAP": ("<!-- AUTO:MODULE-MAP:START -->", "<!-- AUTO:MODULE-MAP:END -->"), + "ENV": ("<!-- AUTO:ENV:START -->", "<!-- AUTO:ENV:END -->"), +} + + +def replace_between(text: str, start: str, end: str, body: str) -> str: + """Replace whatever is between ``start`` and ``end`` markers with ``body``.""" + pattern = re.compile( + re.escape(start) + r".*?" + re.escape(end), + re.DOTALL, + ) + replacement = f"{start}\n{body}\n{end}" + if not pattern.search(text): + # Marker block doesn't exist yet — append it at the end. + return text.rstrip() + "\n\n" + replacement + "\n" + return pattern.sub(replacement, text) + + +# --------------------------------------------------------------------------- +# REPO-MAP +# --------------------------------------------------------------------------- + +TOP_LEVEL_DESCRIPTIONS = { + "openshorts": "Python package — all backend code lives here.", + "dashboard": "React + Vite frontend (out of scope for the current restructure).", + "remotion": "Remotion compositions (TypeScript) consumed by the render-service.", + "render-service": "Standalone TypeScript microservice that bundles + renders Remotion compositions.", + "fonts": "Committed TTFs (Noto Serif Bold) used by hook overlays.", + "scripts": "Developer tooling (update_claude_md.py, install_hooks.sh).", + "tests": "Pytest suite — unit, API contract, and e2e smoke.", + "uploads": "Runtime: incoming video uploads (gitignored).", + "output": "Runtime: generated clips and thumbnails (gitignored).", + "screenshots": "Repo screenshots used in README.md.", +} + + +def build_repo_map() -> str: + lines = ["| Folder | What it is |", "| --- | --- |"] + for entry in sorted(os.listdir(REPO_ROOT)): + full = REPO_ROOT / entry + if not full.is_dir(): + continue + if entry.startswith("."): + continue + if entry in {"__pycache__", ".venv", ".git", "node_modules"}: + continue + desc = TOP_LEVEL_DESCRIPTIONS.get(entry, "_(undocumented — add to TOP_LEVEL_DESCRIPTIONS in scripts/update_claude_md.py)_") + lines.append(f"| `{entry}/` | {desc} |") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# MODULE-MAP +# --------------------------------------------------------------------------- + + +def parse_module(path: Path) -> Tuple[str, List[str]]: + """Return ``(one_line_docstring, public_symbols)`` for a Python module. + + Raises ``ValueError`` if the module has no docstring (so the pre-commit + hook fails the commit and the developer is forced to add one). + """ + source = path.read_text(encoding="utf-8") + try: + tree = ast.parse(source) + except SyntaxError as e: + raise ValueError(f"{path}: syntax error — {e}") + + raw_doc = ast.get_docstring(tree) + if not raw_doc: + raise ValueError(f"{path}: missing module docstring") + + one_line = raw_doc.strip().splitlines()[0].strip() + + public: List[str] = [] + for node in tree.body: + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): + if not node.name.startswith("_"): + public.append(node.name) + + return one_line, public + + +def build_module_map() -> Tuple[str, List[str]]: + """Walk the package and return ``(markdown_table, [errors])``.""" + rows: List[str] = ["| Module | Purpose | Public surface |", "| --- | --- | --- |"] + errors: List[str] = [] + + for path in sorted(PACKAGE_ROOT.rglob("*.py")): + rel = path.relative_to(REPO_ROOT) + try: + doc, symbols = parse_module(path) + except ValueError as e: + errors.append(str(e)) + continue + + if path.name == "__init__.py" and not symbols: + # Skip empty __init__.py rows to keep the table scannable; the doc + # is captured implicitly via the folder description in REPO-MAP. + continue + + symbol_text = ", ".join(f"`{s}`" for s in symbols) if symbols else "_(none)_" + rows.append(f"| `{rel.as_posix()}` | {doc} | {symbol_text} |") + + return "\n".join(rows), errors + + +# --------------------------------------------------------------------------- +# ENV +# --------------------------------------------------------------------------- + +ENV_LINE_RE = re.compile(r"^([A-Z][A-Z0-9_]*)=([^\n]*)$") + + +def build_env_table() -> str: + if not ENV_EXAMPLE.exists(): + return "_(.env.example not found)_" + + rows: List[str] = ["| Variable | Default | Notes |", "| --- | --- | --- |"] + current_section = "" + + for raw_line in ENV_EXAMPLE.read_text(encoding="utf-8").splitlines(): + line = raw_line.rstrip() + # Section headers are written as `# --- Name -------------` + m = re.match(r"^#\s*-+\s*(.+?)\s*-+\s*$", line) + if m: + current_section = m.group(1) + continue + # Comment lines starting with `# VAR=` are documented optional vars. + m = re.match(r"^#\s*([A-Z][A-Z0-9_]*)=(.*)$", line) + if m: + name, default = m.group(1), m.group(2) + rows.append(f"| `{name}` | _(unset)_ | {current_section} (commented — optional) |") + continue + m = ENV_LINE_RE.match(line) + if m: + name, default = m.group(1), m.group(2) + shown_default = default if default else "_(empty — must set)_" + rows.append(f"| `{name}` | `{shown_default}` | {current_section} |") + + return "\n".join(rows) if len(rows) > 2 else "_(no env vars in .env.example)_" + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + +def main() -> int: + parser = argparse.ArgumentParser(description="Regenerate auto-managed CLAUDE.md sections.") + parser.add_argument("--check", action="store_true", + help="Exit non-zero if CLAUDE.md would change (CI mode).") + args = parser.parse_args() + + if not CLAUDE_MD.exists(): + print(f"❌ {CLAUDE_MD} not found.", file=sys.stderr) + return 1 + + repo_map = build_repo_map() + module_map, errors = build_module_map() + env_table = build_env_table() + + if errors: + print("❌ Module docstrings missing — every .py file under openshorts/ " + "must start with a one-line module docstring:", file=sys.stderr) + for err in errors: + print(f" - {err}", file=sys.stderr) + return 2 + + current = CLAUDE_MD.read_text(encoding="utf-8") + updated = current + updated = replace_between(updated, *MARKERS["REPO-MAP"], repo_map) + updated = replace_between(updated, *MARKERS["MODULE-MAP"], module_map) + updated = replace_between(updated, *MARKERS["ENV"], env_table) + + if updated == current: + print("✓ CLAUDE.md already up to date.") + return 0 + + if args.check: + print("❌ CLAUDE.md is out of date — run `python scripts/update_claude_md.py` " + "to regenerate the auto-managed sections.", file=sys.stderr) + return 1 + + CLAUDE_MD.write_text(updated, encoding="utf-8") + print(f"✓ Rewrote {CLAUDE_MD.relative_to(REPO_ROOT)} " + f"(REPO-MAP, MODULE-MAP, ENV sections).") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 726bfd3c7e72c6e5ac184825fdc5f8301ec99928 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 16:05:12 -0400 Subject: [PATCH 16/43] docs(claude.md): rewrite with structured guidance + auto-managed sections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2: replace the old top-down listing with a tree-aware version that tells Claude (and humans) where new code lands. Sections (in order): - Project + quick start (docker compose / backend only / frontend only) - "Where things go" decision table — the heart of the file. 11 rows mapping intent ("add a new HTTP endpoint") to destination ("openshorts/routes/<domain>.py"). Plus the removal checklist. - Repo layout — top-level folders + backend-package subfolders with one-liner rules each. Top-level table is AUTO-MANAGED. - Module map — every .py under openshorts/ with its docstring + public surface. AUTO-MANAGED. - Processing pipeline — 11 stages with function-level references to the new module paths. - API surface — 12-row table; full inventory in the openapi snapshot. - Environment — AUTO-MANAGED from .env.example. - Conventions — six opinionated rules including the "single FFmpeg wrapper" and "every module has a docstring" rules that the pre-commit hook enforces mechanically. - Pointers, Tech stack. Auto-managed sections are filled by scripts/update_claude_md.py between marker comments (REPO-MAP, MODULE-MAP, ENV). Includes a small filter fix to exclude .egg-info / .dist-info from REPO-MAP. Tests stay 62/62 green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- CLAUDE.md | 291 +++++++++++++++++++++++++----------- scripts/update_claude_md.py | 2 + 2 files changed, 205 insertions(+), 88 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 093db8af..a1e7ffa8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,102 +1,217 @@ # CLAUDE.md -This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. +Guidance for Claude Code (and humans) when working with the OpenShorts repo. -## Project Overview +## Project -OpenShorts is an AI-powered vertical video generator that transforms long YouTube videos or local uploads into viral-ready short clips (9:16 format) for TikTok, Instagram Reels, and YouTube Shorts. Uses Google Gemini 2.0 Flash for viral moment detection and title generation. +OpenShorts is an AI-powered vertical short-video generator. It transforms +YouTube videos and local uploads into 9:16 viral clips for TikTok, Reels, +and Shorts. The pipeline uses Google Gemini for viral-moment detection and +title generation, faster-whisper for transcription, PySceneDetect for scene +boundaries, MediaPipe + YOLOv8 for face/person tracking, and FFmpeg for all +encoding/overlay/mux work. -## Development Commands +## Quick start -### Local Development (Docker) ```bash -docker compose up --build # Build and run full stack -``` -- Backend: http://localhost:8000 (FastAPI/Uvicorn) -- Frontend: http://localhost:5175 (Vite proxies API calls to backend) +# Full stack (backend + frontend + Remotion renderer) +docker compose up --build -### Frontend Only (Dashboard) -```bash -cd dashboard -npm install -npm run dev # Dev server with HMR (port 5173) -npm run build # Production build -npm run lint # ESLint (strict, --max-warnings 0) +# Backend only (local dev — needs Python 3.11+ and FFmpeg on PATH) +pip install -r requirements.txt -r requirements-dev.txt +pip install -e . +pytest -m "not e2e" # unit + API contract suite (~0.6s) +uvicorn openshorts.app:app --host 0.0.0.0 --port 8000 + +# Frontend only +cd dashboard && npm install && npm run dev ``` -### Backend Only +Install the CLAUDE.md auto-updater hook once after cloning: + ```bash -pip install -r requirements.txt -uvicorn app:app --host 0.0.0.0 --port 8000 +bash scripts/install_hooks.sh ``` -## Architecture - -### Core Processing Pipeline -1. **Ingest** - YouTube download (yt-dlp) or local upload -2. **Transcription** - faster-whisper with word-level timestamps -3. **Scene Detection** - PySceneDetect for segment boundaries -4. **AI Analysis** - Gemini identifies 3-15 viral moments (15-60 sec each) -5. **FFmpeg Extraction** - Precise clip cutting -6. **AI Cropping** - Vertical reframing with subject tracking -7. **Effects/Subtitles** - Optional AI-generated FFmpeg filters -8. **Hook Overlay** - Text overlays with styled fonts -9. **Voice Dubbing** - Optional ElevenLabs AI translation (30+ languages) -10. **S3 Backup** - Silent background upload -11. **Social Distribution** - Upload-Post API (async upload) - -### Key Files -| File | Purpose | -|------|---------| -| `main.py` | Core video processing: transcription, scene detection, clip extraction, vertical reframing | -| `app.py` | FastAPI server with async job queue and REST endpoints | -| `editor.py` | Gemini AI integration for dynamic video effects (FFmpeg filter generation) | -| `hooks.py` | Hook text overlay generation with font rendering | -| `s3_uploader.py` | AWS S3 upload with caching | -| `subtitles.py` | SRT generation, FFmpeg subtitle burning, and dubbed video transcription | -| `translate.py` | ElevenLabs dubbing API for AI voice translation | -| `dashboard/src/App.jsx` | Main React component with state management | -| `dashboard/src/components/TranslateModal.jsx` | Voice dubbing UI with language selection | - -### Dual-Mode Video Reframing -- **TRACK Mode** (single subject): MediaPipe face detection + YOLOv8 fallback with "Heavy Tripod" stabilization -- **GENERAL Mode** (groups/landscapes): Blurred background layout preserving full width - -### Key Classes -- `SmoothedCameraman` - Stabilized camera movement with safe zone logic (prevents jitter) -- `SpeakerTracker` - Prevents rapid speaker switching, handles temporary occlusions - -### API Endpoints +## Where things go (decision table) + +When you want to **add** something, this is where it lands: + +| If you want to add… | Drop it in… | Notes | +| --- | --- | --- | +| A new HTTP endpoint | `openshorts/routes/<domain>.py` + register in `openshorts/app.py` | The router split from `app.py` is in flight; until it ships, edit `app.py` directly. | +| A new FFmpeg operation | `openshorts/video/ffmpeg.py` | Never call `subprocess.run(['ffmpeg', ...])` outside this module. | +| A new external service client | `openshorts/integrations/<service>.py` | Each one exposes a typed Python client. | +| A new AI model / inference call | `openshorts/ml/<purpose>.py` | Detection, transcription, viral extraction, etc. | +| A new layout template | `openshorts/layouts/<name>.py` | Subclass `Layout` (see [ROADMAP.md](ROADMAP.md) feature B). | +| A new motion-graphic effect | `openshorts/motion_graphics/library/<name>.py` | Subclass `MotionGraphicEffect` (see [ROADMAP.md](ROADMAP.md) feature C). | +| A new audio mixer / SFX | `openshorts/audio/<concern>.py` | See [ROADMAP.md](ROADMAP.md) feature A. | +| A new Gemini prompt | `openshorts/prompts/<name>.md` or `openshorts/editing/prompts.py` | Externalize prompts; don't bury them in handler code. | +| A new Pydantic schema | `openshorts/models/<domain>.py` | One file per request/response domain. | +| A new shared FFmpeg / filter helper | `openshorts/utils/filters.py` | Already used by editing + future motion-graphics compositor. | +| A new core infrastructure piece | `openshorts/core/<concern>.py` | Job queue, job store, api-key resolver, logging. | + +When you want to **remove** something: + +1. Delete the route file (or the function within it). +2. `grep -r <removed_name>` to find dead imports. +3. Delete the corresponding Pydantic model in `openshorts/models/` if any. +4. Delete or update tests that reference it. +5. Run `python scripts/update_claude_md.py` (the pre-commit hook will do this for you). + +## Repo layout + +The top-level folders. **The table below is auto-managed by `scripts/update_claude_md.py`** — never edit it by hand. + +<!-- AUTO:REPO-MAP:START --> +| Folder | What it is | +| --- | --- | +| `dashboard/` | React + Vite frontend (out of scope for the current restructure). | +| `fonts/` | Committed TTFs (Noto Serif Bold) used by hook overlays. | +| `openshorts/` | Python package — all backend code lives here. | +| `output/` | Runtime: generated clips and thumbnails (gitignored). | +| `remotion/` | Remotion compositions (TypeScript) consumed by the render-service. | +| `render-service/` | Standalone TypeScript microservice that bundles + renders Remotion compositions. | +| `screenshots/` | Repo screenshots used in README.md. | +| `scripts/` | Developer tooling (update_claude_md.py, install_hooks.sh). | +| `tests/` | Pytest suite — unit, API contract, and e2e smoke. | +| `uploads/` | Runtime: incoming video uploads (gitignored). | +<!-- AUTO:REPO-MAP:END --> + +### Backend package (`openshorts/`) + +The Python package follows classical layered conventions. Each subfolder +has a one-line purpose statement in its `__init__.py`. + +| Folder | Rule | +| --- | --- | +| `openshorts/core/` | Cross-cutting infra: job queue, job store, API-key resolver, logging. | +| `openshorts/routes/` | FastAPI routers, one module per API domain. | +| `openshorts/video/` | All video work goes here. **FFmpeg only via `video/ffmpeg.py`.** | +| `openshorts/ml/` | AI inference: face/person detection, transcription, viral extraction. | +| `openshorts/audio/` | Future feature A — soundtracks + ducking. | +| `openshorts/layouts/` | Future feature B — layout templates (panorama, educational, etc.). | +| `openshorts/motion_graphics/` | Future feature C — animated overlays + multi-effect compositor. | +| `openshorts/editing/` | AI-generated FFmpeg filter pipeline. | +| `openshorts/overlays/` | Hook cards + subtitle generation / burn-in. | +| `openshorts/ingest/` | YouTube downloads + local upload handling. | +| `openshorts/saas/` | SaaSShorts UGC pipeline (research → script → media → composite). | +| `openshorts/integrations/` | External-service clients (S3, ElevenLabs, fal.ai, Upload-Post). | +| `openshorts/thumbnails/` | YouTube thumbnail workflow (titles, images, descriptions). | +| `openshorts/prompts/` | Externalized Gemini prompt templates. | +| `openshorts/models/` | Pydantic request/response schemas grouped by domain. | +| `openshorts/utils/` | Shared helpers: filter sanitization, path utilities. | + +## Module map + +Every Python module under `openshorts/` and its public surface. **Auto-managed** — regenerated by the pre-commit hook from each file's docstring. + +<!-- AUTO:MODULE-MAP:START --> +| Module | Purpose | Public surface | +| --- | --- | --- | +| `openshorts/app.py` | FastAPI application entrypoint for the openshorts package. | _(none)_ | +| `openshorts/editing/ai_filters.py` | VideoEditor: Gemini-driven FFmpeg filter generation and application. | `VideoEditor` | +| `openshorts/editing/prompts.py` | Gemini prompt templates for AI video-effect generation. | `build_ffmpeg_filter_prompt`, `build_effects_config_prompt` | +| `openshorts/ingest/youtube.py` | YouTube downloader with bot-detection workarounds (yt-dlp + cookies + alt clients). | `sanitize_filename`, `download_youtube_video` | +| `openshorts/integrations/elevenlabs.py` | ElevenLabs Dubbing API client: AI voice translation across 30+ languages. | `create_dubbing_project`, `get_dubbing_status`, `download_dubbed_video`, `translate_video`, `get_supported_languages` | +| `openshorts/integrations/s3.py` | AWS S3 client: clip uploads, actor gallery, UGC video gallery, presigned URLs. | `upload_file_to_s3`, `get_s3_client`, `generate_presigned_url`, `list_all_clips`, `upload_actor_to_s3`, `list_actor_gallery`, `upload_video_to_gallery`, `list_video_gallery`, `upload_job_artifacts` | +| `openshorts/ml/detection.py` | Face and person detection: MediaPipe BlazeFace (primary) + YOLOv8 (fallback). | `detect_face_candidates`, `detect_person_yolo` | +| `openshorts/ml/transcription.py` | faster-whisper transcription: CPU-optimized (INT8 quantization) with word timestamps. | `transcribe_video` | +| `openshorts/ml/viral_extraction.py` | Gemini 2.5 Flash viral-moment extraction: picks 3-15 short clips from a transcript. | `get_viral_clips` | +| `openshorts/overlays/hooks.py` | Hook text overlays: PIL-rendered cards (PNG) burned onto video via FFmpeg. | `download_font_if_needed`, `create_hook_image`, `add_hook_to_video` | +| `openshorts/overlays/subtitles_generate.py` | SRT subtitle generation: transcription and word-level grouping into short lines. | `transcribe_audio`, `generate_srt_from_video`, `generate_srt`, `format_srt_block` | +| `openshorts/overlays/subtitles_render.py` | Subtitle burn-in: FFmpeg subtitles filter + ASS color/style conversion. | `hex_to_ass_color`, `burn_subtitles` | +| `openshorts/saas/pipeline.py` | SaaSShorts: AI-powered UGC video generator for SaaS products. | `research_saas_online`, `scrape_website`, `analyze_saas`, `generate_scripts`, `generate_actor_images`, `generate_actor_image`, `generate_voiceover`, `get_elevenlabs_voices`, `generate_talking_head`, `generate_talking_head_lowcost`, `generate_broll`, `transcribe_audio_for_subs`, `generate_tiktok_subs`, `generate_srt_from_script`, `composite_video`, `generate_full_video` | +| `openshorts/thumbnails/descriptions.py` | YouTube description + chapter-marker generation from transcript segments. | `generate_youtube_description` | +| `openshorts/thumbnails/images.py` | Thumbnail image generation via Gemini multimodal image preview model. | `generate_thumbnail` | +| `openshorts/thumbnails/titles.py` | Gemini-driven viral title generation and conversational refinement. | `analyze_video_for_titles`, `refine_titles` | +| `openshorts/utils/filters.py` | Shared FFmpeg filter helpers: chain splitting, sanitization, zoompan size enforcement. | `split_filter_chain`, `enforce_zoompan_output_size`, `sanitize_filter_string` | +| `openshorts/video/ffmpeg.py` | Single FFmpeg wrapper for the entire codebase. | `FFmpegError`, `run`, `probe_resolution`, `probe_duration`, `cut`, `extract_audio`, `mux_video_audio`, `overlay_png`, `build_filter_complex` | +| `openshorts/video/pipeline.py` | process_video_to_vertical orchestrator: scenes -> strategy -> per-frame crop -> mux. | `process_video_to_vertical` | +| `openshorts/video/reframing.py` | Vertical reframing helpers: blurred-background 'General Shot' composite. | `create_general_frame` | +| `openshorts/video/scene_analysis.py` | PySceneDetect scene boundaries + per-scene TRACK/GENERAL strategy analysis. | `detect_scenes`, `get_video_resolution`, `analyze_scenes_strategy` | +| `openshorts/video/tracking.py` | SmoothedCameraman and SpeakerTracker: the heart of stabilized vertical reframing. | `SmoothedCameraman`, `SpeakerTracker` | +<!-- AUTO:MODULE-MAP:END --> + +## Processing pipeline + +1. **Ingest** — `openshorts/ingest/youtube.py:download_youtube_video()` or a local upload. +2. **Transcribe** — `openshorts/ml/transcription.py:transcribe_video()` (faster-whisper, word timestamps). +3. **Scene-detect** — `openshorts/video/scene_analysis.py:detect_scenes()` (PySceneDetect). +4. **Viral extraction** — `openshorts/ml/viral_extraction.py:get_viral_clips()` (Gemini 2.5 Flash picks 3–15 clips, 15–60 s each). +5. **Cut clips** — FFmpeg `-ss`/`-to` per clip. +6. **Strategy** — `openshorts/video/scene_analysis.py:analyze_scenes_strategy()` decides TRACK vs GENERAL per scene. +7. **Reframe** — `openshorts/video/pipeline.py:process_video_to_vertical()` runs the per-frame loop. +8. **Effects** (optional) — `openshorts/editing/ai_filters.py:VideoEditor` injects Gemini-generated FFmpeg filters. +9. **Hooks + subtitles** (optional) — `openshorts/overlays/`. +10. **Translate** (optional) — `openshorts/integrations/elevenlabs.py:translate_video()` dubs into 30+ languages. +11. **Backup + distribute** — `openshorts/integrations/s3.py` + `openshorts/integrations/upload_post.py` (planned). + +## API surface + | Method | Route | Purpose | -|--------|-------|---------| -| POST | `/api/process` | Submit video for processing | -| GET | `/api/status/{job_id}` | Poll job status and logs | -| POST | `/api/edit` | Apply AI video effects | -| POST | `/api/subtitle` | Generate and apply subtitles (auto-transcribes dubbed videos) | -| POST | `/api/hook` | Add text hook overlays | -| POST | `/api/translate` | AI voice dubbing via ElevenLabs | -| GET | `/api/translate/languages` | List supported dubbing languages | -| POST | `/api/social/post` | Post to social media (async upload) | - -### Concurrency Model -Async job queue with semaphore-based concurrency control. Configure via `MAX_CONCURRENT_JOBS` env var (default: 5). Jobs auto-cleanup after 1 hour. - -## Environment Variables - -**Server-side (.env):** -- `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION`, `AWS_S3_BUCKET` - For S3 backup -- `MAX_CONCURRENT_JOBS` - Concurrent processing limit (default: 5) -- `VITE_API_URL` - Production API URL override - -**Client-side (localStorage, encrypted):** -- `GEMINI_API_KEY` - Google Gemini API key (required) -- `ELEVENLABS_API_KEY` - ElevenLabs API key for voice dubbing (optional) -- `UPLOAD_POST_API_KEY` - Upload-Post API key for social posting (optional) - -> API keys are stored encrypted in the browser and sent via headers only when needed. Never stored server-side. - -## Tech Stack -- **Backend:** Python 3.11, FastAPI, google-genai, faster-whisper, ultralytics (YOLOv8), mediapipe, opencv-python, yt-dlp, FFmpeg, httpx -- **Frontend:** React 18, Vite 4, Tailwind CSS 3.4 -- **External APIs:** Google Gemini, ElevenLabs Dubbing, Upload-Post -- **Infrastructure:** Docker + Docker Compose, AWS S3 +| --- | --- | --- | +| POST | `/api/process` | Submit a video (URL or upload) for processing. | +| GET | `/api/status/{job_id}` | Poll status + logs. | +| POST | `/api/edit` | Apply Gemini-generated FFmpeg filters to a clip. | +| POST | `/api/effects/generate` | Get a structured EffectsConfig for Remotion. | +| POST | `/api/render/{render_id}` | Render via the Remotion microservice. | +| POST | `/api/subtitle` | Generate + burn subtitles. Auto-transcribes dubbed videos. | +| POST | `/api/hook` | Burn a text-hook PNG onto a clip. | +| POST | `/api/translate` | AI voice dubbing via ElevenLabs. | +| GET | `/api/translate/languages` | List supported languages. | +| POST | `/api/social/post` | Distribute via Upload-Post. | +| POST | `/api/thumbnail/*` | YouTube thumbnail workflow (titles, images, descriptions). | +| POST | `/api/saasshorts/*` | SaaS UGC pipeline. | + +The full route inventory (32 endpoints) is locked in `tests/snapshots/baseline.openapi.json`. + +## Environment + +Server-side env vars the code actually reads. **Auto-managed** — generated from `.env.example`. + +<!-- AUTO:ENV:START --> +| Variable | Default | Notes | +| --- | --- | --- | +| `GEMINI_API_KEY` | `_(empty — must set)_` | Required (server-side reads via os.getenv) | +| `AWS_ACCESS_KEY_ID` | `_(empty — must set)_` | Optional: AWS S3 (clip backup + public gallery) | +| `AWS_SECRET_ACCESS_KEY` | `_(empty — must set)_` | Optional: AWS S3 (clip backup + public gallery) | +| `AWS_REGION` | `eu-west-3` | Optional: AWS S3 (clip backup + public gallery) | +| `AWS_S3_BUCKET` | `_(empty — must set)_` | Optional: AWS S3 (clip backup + public gallery) | +| `AWS_S3_PUBLIC_BUCKET` | `_(empty — must set)_` | Optional: AWS S3 (clip backup + public gallery) | +| `DISABLE_YOUTUBE_URL` | `false` | Optional: YouTube ingestion | +| `YOUTUBE_COOKIES` | _(unset)_ | Optional: YouTube ingestion (commented — optional) | +| `RENDER_SERVICE_URL` | `http://renderer:3100` | Optional: Remotion render service | +| `MAX_CONCURRENT_JOBS` | `5` | Tuning | +| `VITE_API_URL` | `http://localhost:8000` | Tuning | +| `VITE_ENCRYPTION_KEY` | _(unset)_ | Tuning (commented — optional) | +| `ELEVENLABS_API_KEY` | _(unset)_ | Tuning (commented — optional) | +| `UPLOAD_POST_API_KEY` | _(unset)_ | Tuning (commented — optional) | +| `FAL_KEY` | _(unset)_ | Tuning (commented — optional) | +<!-- AUTO:ENV:END --> + +ElevenLabs / Upload-Post / fal.ai keys are **client-side** (encrypted in browser localStorage, sent per-request via headers). They are NOT read from `.env`. + +## Conventions + +1. **Single FFmpeg wrapper.** Every `subprocess.run(['ffmpeg', ...])` call should funnel through `openshorts/video/ffmpeg.py`. Migration of existing callers is incremental — but new code must use the wrapper. +2. **API keys via headers, not env.** Client-side keys (Gemini, ElevenLabs, Upload-Post, fal.ai) arrive on each request as `X-...-Key`. The resolver helper for these lives in `openshorts/core/api_keys.py` (planned). Do NOT call `request.headers.get('X-...')` outside that file. +3. **Prompts as files.** New Gemini prompts go in `openshorts/prompts/<name>.md` and are loaded by name. Editing-domain prompts may stay inline in `openshorts/editing/prompts.py`. +4. **Every module starts with a docstring.** The pre-commit hook (`scripts/update_claude_md.py`) fails the commit if any `.py` file under `openshorts/` lacks one. Use a single line — it becomes the row in the auto-managed module map. +5. **Tests first.** A characterization test suite (`tests/`) was written *before* the restructure. Anything that touches behavior should keep `pytest -m "not e2e"` 100% green. The OpenAPI snapshot in `tests/snapshots/baseline.openapi.json` pins the public API. +6. **No new global dicts in routers.** Job state goes through `openshorts/core/job_store.py` (planned). Today, `app.py` still owns these dicts — keep them centralized there until the routers are split out. + +## Pointers + +- `ROADMAP.md` — designs for the three upcoming features (motion graphics, soundtracks, layouts) and deferred refactors (router split, FFmpeg-wrapper migration, saasshorts internal split). +- `scripts/update_claude_md.py` — what regenerates the auto-managed sections of this file. +- `scripts/install_hooks.sh` — one-liner to wire up the pre-commit hook. +- `tests/snapshots/baseline.openapi.json` — the contract that any backend change must keep green. +- `dashboard/` — the React/Vite frontend (deliberately out of scope for the current restructure). + +## Tech stack + +- **Backend:** Python 3.11, FastAPI, google-genai, faster-whisper, ultralytics (YOLOv8), mediapipe, opencv-python, yt-dlp, FFmpeg, httpx. +- **Frontend:** React 18, Vite 4, Tailwind CSS 3.4. +- **External:** Google Gemini, ElevenLabs Dubbing, Upload-Post, fal.ai (Flux + Kling), Remotion. +- **Infra:** Docker + Docker Compose, AWS S3. diff --git a/scripts/update_claude_md.py b/scripts/update_claude_md.py index 4602f3fa..62e7d3f0 100755 --- a/scripts/update_claude_md.py +++ b/scripts/update_claude_md.py @@ -106,6 +106,8 @@ def build_repo_map() -> str: continue if entry in {"__pycache__", ".venv", ".git", "node_modules"}: continue + if entry.endswith(".egg-info") or entry.endswith(".dist-info"): + continue desc = TOP_LEVEL_DESCRIPTIONS.get(entry, "_(undocumented — add to TOP_LEVEL_DESCRIPTIONS in scripts/update_claude_md.py)_") lines.append(f"| `{entry}/` | {desc} |") return "\n".join(lines) From 9e68944d18a7b35d4209f9f439b230ae92f52d67 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 16:05:39 -0400 Subject: [PATCH 17/43] docs(claude.md): add per-folder sub-CLAUDE.md stubs for high-rule areas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five small CLAUDE.md files at directory boundaries. Each carries the one rule that's easy to violate when working inside that subtree: - openshorts/video/ — FFmpeg only via ffmpeg.py - openshorts/layouts/ — subclass Layout; don't bypass in callers - openshorts/motion_graphics/ — register effects + batch via compositor - openshorts/audio/ — never mix audio inside video/ - openshorts/prompts/ — one .md per prompt; loaded by name Per the brainstorming / web-research guidance the user requested: sub-CLAUDE.md files at directory boundaries keep guidance scoped and discoverable without bloating the root CLAUDE.md. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- openshorts/audio/CLAUDE.md | 7 +++++++ openshorts/layouts/CLAUDE.md | 11 +++++++++++ openshorts/motion_graphics/CLAUDE.md | 10 ++++++++++ openshorts/prompts/CLAUDE.md | 10 ++++++++++ openshorts/video/CLAUDE.md | 5 +++++ 5 files changed, 43 insertions(+) create mode 100644 openshorts/audio/CLAUDE.md create mode 100644 openshorts/layouts/CLAUDE.md create mode 100644 openshorts/motion_graphics/CLAUDE.md create mode 100644 openshorts/prompts/CLAUDE.md create mode 100644 openshorts/video/CLAUDE.md diff --git a/openshorts/audio/CLAUDE.md b/openshorts/audio/CLAUDE.md new file mode 100644 index 00000000..b921ecd7 --- /dev/null +++ b/openshorts/audio/CLAUDE.md @@ -0,0 +1,7 @@ +# `openshorts/audio/` + +All audio mixing happens here. Never edit audio inside `openshorts/video/` +— if a feature is "audio that interacts with video timing", the audio side +of it lives here and the video side calls in. + +See `ROADMAP.md` (feature A) for the soundtracks + ducking design. diff --git a/openshorts/layouts/CLAUDE.md b/openshorts/layouts/CLAUDE.md new file mode 100644 index 00000000..d3b82e89 --- /dev/null +++ b/openshorts/layouts/CLAUDE.md @@ -0,0 +1,11 @@ +# `openshorts/layouts/` + +Layout templates control how a vertical clip is composed (single-subject +panorama, two-pane educational, side-by-side, picture-in-picture). Each +layout subclasses `Layout` from `base.py` and exposes a single +`render_frame(frame, detections, frame_number)` method. The pipeline's +hot loop calls that — don't bypass it with raw cv2 in routers or other +high-level callers. + +See `ROADMAP.md` (feature B) for the migration plan from today's inline +TRACK/GENERAL branching to polymorphic layouts. diff --git a/openshorts/motion_graphics/CLAUDE.md b/openshorts/motion_graphics/CLAUDE.md new file mode 100644 index 00000000..7ea73a74 --- /dev/null +++ b/openshorts/motion_graphics/CLAUDE.md @@ -0,0 +1,10 @@ +# `openshorts/motion_graphics/` + +Animated overlays (lower-thirds, callouts, progress bars, animated emoji, +etc.). Each effect subclasses `MotionGraphicEffect` from `base.py` and is +registered in `library/__init__.py`. The compositor in `compositor.py` +batches a timeline of effects into a single `filter_complex` chain so the +video is encoded only once. + +See `ROADMAP.md` (feature C) — this ships first because it's the +prerequisite for the audio mixer's batching pattern. diff --git a/openshorts/prompts/CLAUDE.md b/openshorts/prompts/CLAUDE.md new file mode 100644 index 00000000..a6817481 --- /dev/null +++ b/openshorts/prompts/CLAUDE.md @@ -0,0 +1,10 @@ +# `openshorts/prompts/` + +Externalized Gemini prompt templates. Convention: one `.md` file per +prompt, loaded by name via the package-level loader (planned). Supports +`{{var}}` placeholder substitution at load time. Keeping prompts as files +(rather than string literals buried inside handlers) lets non-engineers +iterate on them and makes prompt-history diffs readable. + +Editing-domain prompts that need heavy `f"..."` interpolation may stay in +`openshorts/editing/prompts.py` instead. diff --git a/openshorts/video/CLAUDE.md b/openshorts/video/CLAUDE.md new file mode 100644 index 00000000..d401d692 --- /dev/null +++ b/openshorts/video/CLAUDE.md @@ -0,0 +1,5 @@ +# `openshorts/video/` + +All video processing. **FFmpeg is invoked only through `ffmpeg.py`.** Never +call `subprocess.run(['ffmpeg', ...])` directly from a module in this folder +or any caller of it — funnel through the wrapper. From 1dd4b9abe7417c26713032340c58e7c6e4055167 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 16:07:18 -0400 Subject: [PATCH 18/43] docs(roadmap): design future features + document deferred refactors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 5: write ROADMAP.md with three feature designs and a candid account of what was deferred from Phase 1. Ordering rationale (lowest blast radius first): 1. Motion Graphics Library — reuses overlay pattern, ships first because its compositor is the prerequisite for A's audio batching. 2. Background Soundtracks + Ducking — self-contained at the audio layer once the FFmpeg wrapper is migrated. 3. Layout Templates (educational, side-by-side, picture-in-picture) — last because it touches the per-frame loop in pipeline.py. Each feature section includes: rationale, architecture sketch, files to add, integration points (referencing the new module paths from the restructure), API surface, and risks. Deferred refactors documented honestly: - Full router split of app.py - subprocess.run -> openshorts/video/ffmpeg.py migration - Internal split of openshorts/saas/pipeline.py - openshorts/core/{job_store,api_keys}.py extraction - Frontend restructure (always out of scope this round) Plus the commit log of what landed in this restructure and the revert point: `git reset --hard pre-restructure-20260519-1526`. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- ROADMAP.md | 259 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 ROADMAP.md diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 00000000..d07b080e --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,259 @@ +# ROADMAP + +Designs and ordering for the three future features the user asked about +during the restructure planning, plus the refactors deliberately deferred +out of the restructure phase so it could ship safely. + +The headline rule: **everything below depends on the package structure that +already shipped in Phase 1, plus the single-FFmpeg-wrapper convention.** +Each feature is sized so that it can land in a small handful of atomic +commits with the `pytest -m "not e2e"` suite green between commits. + +--- + +## Ordering (lowest blast radius first) + +1. **Feature C — Motion Graphics Library.** Reuses the proven + FFmpeg-overlay pattern from `openshorts/overlays/hooks.py`. No changes + to the pipeline hot loop. **Ships first** because the compositor it + introduces is the prerequisite for feature A's audio batching. +2. **Feature A — Background Soundtracks + SFX with Ducking.** Self-contained + at the audio layer once C's compositor exists. Integrates at the + single audio-mux step in `openshorts/video/pipeline.py` — small + surface area, but it needs the FFmpeg wrapper migration (below) done. +3. **Feature B — Layout Templates.** Last because it touches the hottest + loop in the codebase. Once C and A have landed, layouts is a clean + polymorphism extraction with no need to also be inventing infra. + +The three deferred refactors interleave naturally: + +- Before **A**: finish migrating every `subprocess.run(['ffmpeg', ...])` call to + `openshorts/video/ffmpeg.py` (Phase 1.10 leftover). +- Before or alongside **B**: split `app.py` into the eleven planned routers + under `openshorts/routes/` and centralize job state in + `openshorts/core/job_store.py` (Phase 1.9 leftover). +- Independently: split `openshorts/saas/pipeline.py` into the five planned + modules (research / scripting / media / compositing / pipeline) (Phase 1.8 + leftover). + +--- + +## Feature C — Motion Graphics Library + +### Why first + +The hook-overlay code in `openshorts/overlays/hooks.py:add_hook_to_video()` +already proves out the pattern: render PNG via PIL, burn onto video via +FFmpeg `overlay` filter. Generalizing that to "a library of effects, each +rendered to a PNG sequence or alpha .mov, then composited in one ffmpeg +invocation" is a small extension. No changes to the per-frame loop. + +### Architecture + +``` +openshorts/motion_graphics/ +├── base.py +│ class MotionGraphicEffect(ABC): +│ def render(self, duration_sec, fps, out_dir) -> Path # returns PNG seq or .mov with alpha +│ def get_overlay_filter(self, start_sec, end_sec, w, h) -> str # the FFmpeg filter chain +│ +├── compositor.py +│ class MotionGraphicsCompositor: +│ def add(self, effect: MotionGraphicEffect, start_sec, end_sec): ... +│ def render(self, input_video, output_video): +│ # 1. ask each effect for its PNG/mov +│ # 2. build ONE filter_complex chain ([0:v][1:v]overlay=...[v1];[v1][2:v]overlay=...[v2];...) +│ # 3. invoke openshorts.video.ffmpeg.run(...) ONCE — single re-encode +│ +└── library/ + ├── lower_thirds.py class LowerThirdsEffect + ├── callout.py class CalloutEffect + ├── progress_bar.py class ProgressBarEffect + └── animated_emoji.py class AnimatedEmojiEffect +``` + +### Files to add + +- `openshorts/motion_graphics/base.py` +- `openshorts/motion_graphics/compositor.py` +- `openshorts/motion_graphics/library/{lower_thirds,callout,progress_bar,animated_emoji}.py` +- `openshorts/routes/motion_graphics.py` — `GET /api/motion-graphics/library` (lists effects + thumbnails) and `POST /api/motion-graphics/render` (apply a timeline) +- `openshorts/models/motion_graphics.py` — Pydantic schemas (`EffectInstance`, `RenderTimeline`, etc.) +- Frontend: a `MotionGraphicsModal.jsx` matching the existing `HookModal` / `SubtitleModal` pattern (defer until UI work is in scope) + +### Integration + +The compositor sits *after* the vertical-reframing step and *before* the +audio mux in `openshorts/video/pipeline.py`. Easiest way to wire it in +is to make `process_video_to_vertical()` accept an optional +`motion_graphics_timeline` argument and, if present, route the +silent-video output through the compositor before the audio merge. + +### Risks the pipeline analysis flagged + +- **Re-encoding per overlay.** Mitigated by the compositor building a + single `filter_complex` chain — the video is decoded and re-encoded + exactly once regardless of how many effects are applied. +- **PNG-sequence disk usage.** Each effect writes its frames to a per-clip + temp dir under `output/<job_id>/_mg/`; cleaned up after the final mux. + +--- + +## Feature A — Background Soundtracks + SFX with Ducking + +### Why second + +Logically independent of layouts. Needs the FFmpeg wrapper done so +the mixer can compose `amix` + `volume` + `silencedetect` chains cleanly. + +### Architecture + +``` +openshorts/audio/ +├── mixer.py +│ def mix_audio_tracks(original_audio, music_track, sfx_cues, output, ducking_db=-18): +│ # 1. Detect speech intervals via Whisper word timings (already cached in metadata.json) +│ # OR via FFmpeg silencedetect if no transcript available. +│ # 2. Build a `volume` filter on the music track with `enable=between(t,...)` per speech interval. +│ # 3. amix=inputs=2 (original + ducked music) + each SFX cue at its trigger time. +│ # 4. Funnel through openshorts.video.ffmpeg.run(...). +│ +├── library.py +│ def list_tracks(genre=None, mood=None, length_sec=None) -> list[TrackMeta] +│ # Reads assets/music/manifest.json — committed file listing tracks under assets/music/ +│ +└── cues.py + def generate_sfx_cues(transcript, gemini_key) -> list[SfxCue] + # Gemini analyzes transcript to suggest SFX moments (zoom-ins, scene changes, hook delivery). + # Prompt lives at openshorts/prompts/sfx_cues.md. +``` + +### Files to add + +- `openshorts/audio/mixer.py` +- `openshorts/audio/library.py` +- `openshorts/audio/cues.py` +- `openshorts/prompts/sfx_cues.md` +- `openshorts/routes/audio.py` — `POST /api/audio/apply` +- `openshorts/models/audio.py` +- `assets/music/manifest.json` + a small set of CC-licensed tracks (or stub manifest + user uploads in v1) + +### Integration + +Inside `openshorts/video/pipeline.py:process_video_to_vertical()` at the +existing audio-mux step (today around the `merge_command` block). The +audio mixer takes the original audio from `temp_audio_output`, mixes in +the soundtrack + cues, and writes the mixed audio back over the +intermediate file before the final mux. The video side never sees this. + +### Risks + +- **Speech-detection accuracy.** When word timings are unreliable + (background noise, music in the source), fall back to FFmpeg + `silencedetect=n=-30dB:d=0.5` to bracket speech intervals. +- **Music licensing.** v1 ships with placeholder royalty-free files + under `assets/music/`. v2 can swap in an Epidemic Sound / Artlist + client behind `openshorts/integrations/`. + +--- + +## Feature B — Layout Templates + +### Why last + +Touches the per-frame loop in `openshorts/video/pipeline.py`. The other +two features add new boxes alongside the loop; this one rewrites how the +loop branches. Biggest blast radius — best to land it after C and A are +shipped and the test suite has shaken out any edge cases. + +### Architecture + +``` +openshorts/layouts/ +├── base.py +│ class Layout(ABC): +│ def __init__(self, output_w, output_h, video_w, video_h, fps): ... +│ def render_frame(self, frame, detections, frame_number) -> np.ndarray +│ def on_scene_change(self, scene_index): ... # for cameramen / trackers to snap +│ +├── vertical_panorama.py class VerticalPanoramaLayout # today's TRACK / GENERAL behavior, polymorphic +├── educational.py class EducationalLayout # top half = source content, bottom = presenter headshot +└── side_by_side.py class SideBySideLayout # stub for the next variant +``` + +### Files to add + +- `openshorts/layouts/base.py`, `vertical_panorama.py`, `educational.py`, `side_by_side.py` +- `openshorts/routes/layouts.py` — `layout` field accepted on `POST /api/process`; later `POST /api/layout/reapply` to swap layout on an existing job's clips without re-transcribing +- `openshorts/models/layouts.py` + +### Pipeline change + +The branching at the heart of `process_video_to_vertical()` (the +`if current_strategy == 'GENERAL': ... else: ...` block) becomes: + +```python +layout: Layout = layout_registry.get(request.layout) # default: VerticalPanoramaLayout +# ... in the frame loop: +output_frame = layout.render_frame(frame, detections, frame_number) +``` + +`VerticalPanoramaLayout` wraps today's `SmoothedCameraman` + +`SpeakerTracker` + `create_general_frame()` exactly as they are — the +restructure already kept those in their own modules precisely to +support this. + +`EducationalLayout` owns *two* cameramen — one for the source content +(top half, treated as a screencast crop) and one for the presenter face +(bottom half, tight headshot crop using `detect_face_candidates`). +At each frame, both crops are computed and stacked vertically. If no +face is detected for the presenter slot, falls back to vertical panorama +for that segment. + +### Risks + +- **Per-frame cost.** Two cameramen + two crops doubles the + detection / transform cost. Mitigation: detect once per frame; both + cameramen consume the same `detections` list. +- **Layout-change-mid-clip.** Out of scope for v1 — layout is fixed for + the whole clip. v2 could allow per-scene layout swaps. + +--- + +## Deferred refactors (Phase 1 leftovers) + +| Refactor | Why deferred | Plan | +| --- | --- | --- | +| Full router split of `app.py` | 2256 lines / 32 routes; doing it as one pass would have been risky given the test suite mocks heavy ML deps at the module-import boundary. | Split per the plan: 11 routers under `openshorts/routes/` + `create_app()` factory in `openshorts/app.py`. One router per commit. The OpenAPI snapshot in `tests/snapshots/baseline.openapi.json` is the gate — it must stay byte-identical except when a route is deliberately changed. | +| Migrate every `subprocess.run(['ffmpeg', ...])` to `openshorts/video/ffmpeg.py` | Many call sites (app.py, video/pipeline.py, overlays/*, editing/ai_filters.py, saas/pipeline.py). Migrating all of them in one pass would have ballooned the restructure commit set. | One caller per commit. Tests between. The hook overlay in `overlays/hooks.py:add_hook_to_video()` is a good first migration — small, well-tested. | +| Internal split of `openshorts/saas/pipeline.py` | 1474-line file. No direct test coverage (only via the OpenAPI contract). Splitting it carries risk without the safety net of tests. | Per the original plan: `saas/research.py` (scraping + analyze), `saas/scripting.py`, `saas/media.py` (fal.ai + ElevenLabs TTS), `saas/compositing.py`, `saas/pipeline.py` (orchestrator). Add focused unit tests for the research + scripting + compositing layers as you split them. | +| `openshorts/core/job_store.py` + `api_keys.py` resolver | Today the job-state dicts (`jobs`, `thumbnail_sessions`, `publish_jobs`, `saas_jobs`) live as globals in `app.py`. The router split is a natural place to extract them. | Land alongside the router split, not before — extracting them prematurely just shifts where the globals live without delivering value. | +| Frontend restructure | Explicitly out of scope per the planning Q&A — frontend changes are deferred to a separate round. | When the user is ready: split `dashboard/src/App.jsx` along the same modal-per-feature axes as the backend routes, and introduce a centralized api client. | + +--- + +## What landed in this restructure + +For posterity. Phase 0 + Phase 1 + Phases 2-5 produced these commits on +`chore/restructure-and-docs` (newest first): + +- `docs(claude.md): add per-folder sub-CLAUDE.md stubs` — five `CLAUDE.md` files at directory boundaries. +- `docs(claude.md): rewrite with structured guidance + auto-managed sections` — the new CLAUDE.md. +- `chore(tooling): add CLAUDE.md auto-updater + pre-commit hook`. +- `docs(env): expand .env.example to match what the code actually reads`. +- `chore(restructure): Dockerfile CMD points at openshorts.app:app`. +- `chore(restructure): add openshorts/video/ffmpeg.py wrapper scaffold`. +- `chore(restructure): add openshorts/app.py re-export for Docker entrypoint`. +- `chore(restructure): move saasshorts -> openshorts/saas/pipeline.py`. +- `chore(restructure): split main.py -> video/* + ml/* + ingest/youtube.py`. +- `chore(restructure): split thumbnail -> thumbnails/{titles,images,descriptions}.py`. +- `chore(restructure): split editor -> editing/ai_filters + editing/prompts + utils/filters`. +- `chore(restructure): split subtitles -> overlays/subtitles_{generate,render}.py`. +- `chore(restructure): move hooks -> openshorts/overlays/hooks.py`. +- `chore(restructure): move translate -> openshorts/integrations/elevenlabs.py`. +- `chore(restructure): move s3_uploader -> openshorts/integrations/s3.py`. +- `chore(restructure): scaffold empty openshorts/ package + extend pyproject`. +- `test: add Phase 0 safety net before restructure`. + +The revert point: `git tag pre-restructure-20260519-1526`. `git reset --hard +pre-restructure-20260519-1526` returns the tree to its pre-restructure state. From 55f0ef155c55da17cdaa8d69dd84675e6c533227 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 16:33:38 -0400 Subject: [PATCH 19/43] chore(restructure): split repo into backend/ + frontend/ + renderer/ + assets/ Top-level cleanup per fastapi/full-stack-fastapi-template conventions. The root is now a clear monorepo with three deployable services and no loose Python files. Layout: backend/ Python FastAPI (was: openshorts/ + root .py monoliths) frontend/ React + Vite dashboard (renamed from dashboard/) renderer/ Remotion service + compositions (was: render-service/ + remotion/) assets/ Committed fonts + screenshots (was: fonts/ + screenshots/) scripts/ Dev tooling (unchanged) Highlights: - openshorts/ Python package renamed to backend/app/ to match FastAPI template convention (uvicorn app.main:app). - Root app.py (the 2256-line FastAPI monolith) moved to backend/app/main.py; its shim imports now point at app.integrations.s3, app.editing.ai_filters, app.overlays.subtitles_*, app.thumbnails.*, app.saas.pipeline, etc. - Root main.py CLI moved to backend/app/cli.py with rewritten imports. - All root .py shims deleted (editor.py, hooks.py, subtitles.py, translate.py, s3_uploader.py, thumbnail.py, saasshorts.py) plus the three verify_*.py scripts that the test suite replaced. - backend/Dockerfile uses uvicorn app.main:app entrypoint. - renderer/service/Dockerfile updated for new compositions/ path. - docker-compose.yml updated: backend builds ./backend, frontend builds ./frontend, renderer builds renderer/service/Dockerfile. - Font path now auto-resolves walking up from hooks.py until it finds assets/fonts/, so tests work whether run from repo root or backend/. - scripts/update_claude_md.py: PACKAGE_ROOT -> backend/app, repo-map descriptions updated for new top-level layout. - CLAUDE.md: hand-written sections rewritten for new paths; auto-managed REPO-MAP/MODULE-MAP/ENV sections regenerated by the updater. - .gitignore: snapshot/fixture paths repointed under backend/tests/. Tests: 62/62 green (pytest -m "not e2e" from backend/). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- .gitignore | 4 +- CLAUDE.md | 176 ++++++++++-------- {fonts => assets/fonts}/NotoSerif-Bold.ttf | Bin .../screenshots}/ai-shorts.png | Bin .../screenshots}/clip-results.png | Bin .../screenshots}/ugc-gallery.png | Bin .../screenshots}/youtube-studio.png | Bin Dockerfile => backend/Dockerfile | 6 +- {openshorts => backend/app}/__init__.py | 0 {openshorts => backend/app}/audio/CLAUDE.md | 0 {openshorts => backend/app}/audio/__init__.py | 0 main.py => backend/app/cli.py | 16 +- {openshorts => backend/app}/core/__init__.py | 0 .../app}/editing/__init__.py | 0 .../app}/editing/ai_filters.py | 4 +- .../app}/editing/prompts.py | 0 .../app}/ingest/__init__.py | 0 {openshorts => backend/app}/ingest/youtube.py | 0 .../app}/integrations/__init__.py | 0 .../app}/integrations/elevenlabs.py | 0 .../app}/integrations/s3.py | 0 {openshorts => backend/app}/layouts/CLAUDE.md | 0 .../app}/layouts/__init__.py | 0 app.py => backend/app/main.py | 24 ++- {openshorts => backend/app}/ml/__init__.py | 0 {openshorts => backend/app}/ml/detection.py | 0 .../app}/ml/transcription.py | 0 .../app}/ml/viral_extraction.py | 0 .../app}/models/__init__.py | 0 .../app}/motion_graphics/CLAUDE.md | 0 .../app}/motion_graphics/__init__.py | 0 .../app}/motion_graphics/library/__init__.py | 0 .../app}/overlays/__init__.py | 0 {openshorts => backend/app}/overlays/hooks.py | 20 +- .../app}/overlays/subtitles_generate.py | 0 .../app}/overlays/subtitles_render.py | 0 {openshorts => backend/app}/prompts/CLAUDE.md | 0 .../app}/prompts/__init__.py | 0 .../app}/routes/__init__.py | 0 {openshorts => backend/app}/saas/__init__.py | 0 {openshorts => backend/app}/saas/pipeline.py | 0 .../app}/thumbnails/__init__.py | 0 .../app}/thumbnails/descriptions.py | 0 .../app}/thumbnails/images.py | 0 .../app}/thumbnails/titles.py | 2 +- {openshorts => backend/app}/utils/__init__.py | 0 {openshorts => backend/app}/utils/filters.py | 0 {openshorts => backend/app}/video/CLAUDE.md | 0 {openshorts => backend/app}/video/__init__.py | 0 {openshorts => backend/app}/video/ffmpeg.py | 0 {openshorts => backend/app}/video/pipeline.py | 8 +- .../app}/video/reframing.py | 0 .../app}/video/scene_analysis.py | 2 +- {openshorts => backend/app}/video/tracking.py | 0 pyproject.toml => backend/pyproject.toml | 4 +- .../requirements-dev.txt | 0 requirements.txt => backend/requirements.txt | 0 {tests => backend/tests}/__init__.py | 0 {tests => backend/tests}/api/__init__.py | 0 .../tests}/api/test_openapi_contract.py | 4 +- {tests => backend/tests}/conftest.py | 8 +- {tests => backend/tests}/e2e/__init__.py | 0 .../tests}/e2e/test_pipeline_smoke.py | 0 {tests => backend/tests}/fixtures/README.md | 0 {tests => backend/tests}/snapshots/.gitkeep | 0 .../tests}/snapshots/baseline.openapi.json | 0 {tests => backend/tests}/unit/__init__.py | 0 .../tests}/unit/test_filter_sanitization.py | 2 +- .../tests}/unit/test_hook_image.py | 2 +- .../tests}/unit/test_srt_generation.py | 3 +- .../tests}/unit/test_tracking.py | 2 +- .../tests}/unit/test_translate_languages.py | 2 +- docker-compose.yml | 12 +- editor.py | 11 -- {dashboard => frontend}/.gitignore | 0 {dashboard => frontend}/Dockerfile | 0 {dashboard => frontend}/README.md | 0 {dashboard => frontend}/eslint.config.js | 0 {dashboard => frontend}/index.html | 0 {dashboard => frontend}/package-lock.json | 0 {dashboard => frontend}/package.json | 0 {dashboard => frontend}/postcss.config.js | 0 .../public/logo-openshorts.png | Bin {dashboard => frontend}/public/og-image.png | Bin {dashboard => frontend}/public/robots.txt | 0 {dashboard => frontend}/public/sitemap.xml | 0 {dashboard => frontend}/public/vite.svg | 0 {dashboard => frontend}/src/App.css | 0 {dashboard => frontend}/src/App.jsx | 0 {dashboard => frontend}/src/Landing.jsx | 0 {dashboard => frontend}/src/Legal.jsx | 0 {dashboard => frontend}/src/assets/react.svg | 0 .../src/components/Gallery.jsx | 0 .../src/components/GalleryCard.jsx | 0 .../src/components/HookModal.jsx | 0 .../src/components/KeyInput.jsx | 0 .../src/components/MediaInput.jsx | 0 .../src/components/ProcessingAnimation.jsx | 0 .../src/components/RemotionPreview.jsx | 0 .../src/components/ResultCard.jsx | 0 .../src/components/SaaShortsTab.jsx | 0 .../src/components/ScheduleWeekModal.jsx | 0 .../src/components/SubtitleModal.jsx | 0 .../src/components/ThumbnailStudio.jsx | 0 .../src/components/TranslateModal.jsx | 0 .../src/components/UGCGallery.jsx | 0 {dashboard => frontend}/src/config.js | 0 {dashboard => frontend}/src/index.css | 0 .../src/lib/renderInBrowser.js | 0 {dashboard => frontend}/src/main.jsx | 0 .../src/remotion/compositions/HookOverlay.tsx | 0 .../src/remotion/compositions/ShortVideo.tsx | 0 .../src/remotion/compositions/Subtitles.tsx | 0 .../remotion/compositions/VideoEffects.tsx | 0 .../src/remotion/lib/captions.ts | 0 .../src/remotion/lib/fonts.ts | 0 .../src/remotion/lib/types.ts | 0 {dashboard => frontend}/tailwind.config.js | 0 {dashboard => frontend}/vite.config.js | 0 hooks.py | 15 -- openshorts/app.py | 23 --- .../compositions}/.gitignore | 0 .../compositions}/package.json | 0 .../public/fonts/NotoSerif-Bold.ttf | Bin .../compositions}/src/Root.tsx | 0 .../src/compositions/HookOverlay.tsx | 0 .../src/compositions/ShortVideo.tsx | 0 .../src/compositions/Subtitles.tsx | 0 .../src/compositions/VideoEffects.tsx | 0 .../compositions}/src/index.ts | 0 .../compositions}/src/lib/captions.ts | 0 .../compositions}/src/lib/fonts.ts | 0 .../compositions}/src/lib/types.ts | 0 .../compositions}/tsconfig.json | 0 .../service}/.gitignore | 0 .../service}/Dockerfile | 14 +- .../service}/package-lock.json | 0 .../service}/package.json | 0 .../service}/src/bundle.ts | 0 .../service}/src/render-worker.ts | 0 .../service}/src/server.ts | 0 .../service}/tsconfig.json | 0 s3_uploader.py | 19 -- saasshorts.py | 22 --- scripts/update_claude_md.py | 19 +- subtitles.py | 19 -- thumbnail.py | 16 -- translate.py | 16 -- verify_aesthetic.py | 37 ---- verify_custom_hook.py | 32 ---- verify_hooks.py | 36 ---- 151 files changed, 186 insertions(+), 394 deletions(-) rename {fonts => assets/fonts}/NotoSerif-Bold.ttf (100%) rename {screenshots => assets/screenshots}/ai-shorts.png (100%) rename {screenshots => assets/screenshots}/clip-results.png (100%) rename {screenshots => assets/screenshots}/ugc-gallery.png (100%) rename {screenshots => assets/screenshots}/youtube-studio.png (100%) rename Dockerfile => backend/Dockerfile (87%) rename {openshorts => backend/app}/__init__.py (100%) rename {openshorts => backend/app}/audio/CLAUDE.md (100%) rename {openshorts => backend/app}/audio/__init__.py (100%) rename main.py => backend/app/cli.py (92%) rename {openshorts => backend/app}/core/__init__.py (100%) rename {openshorts => backend/app}/editing/__init__.py (100%) rename {openshorts => backend/app}/editing/ai_filters.py (99%) rename {openshorts => backend/app}/editing/prompts.py (100%) rename {openshorts => backend/app}/ingest/__init__.py (100%) rename {openshorts => backend/app}/ingest/youtube.py (100%) rename {openshorts => backend/app}/integrations/__init__.py (100%) rename {openshorts => backend/app}/integrations/elevenlabs.py (100%) rename {openshorts => backend/app}/integrations/s3.py (100%) rename {openshorts => backend/app}/layouts/CLAUDE.md (100%) rename {openshorts => backend/app}/layouts/__init__.py (100%) rename app.py => backend/app/main.py (98%) rename {openshorts => backend/app}/ml/__init__.py (100%) rename {openshorts => backend/app}/ml/detection.py (100%) rename {openshorts => backend/app}/ml/transcription.py (100%) rename {openshorts => backend/app}/ml/viral_extraction.py (100%) rename {openshorts => backend/app}/models/__init__.py (100%) rename {openshorts => backend/app}/motion_graphics/CLAUDE.md (100%) rename {openshorts => backend/app}/motion_graphics/__init__.py (100%) rename {openshorts => backend/app}/motion_graphics/library/__init__.py (100%) rename {openshorts => backend/app}/overlays/__init__.py (100%) rename {openshorts => backend/app}/overlays/hooks.py (93%) rename {openshorts => backend/app}/overlays/subtitles_generate.py (100%) rename {openshorts => backend/app}/overlays/subtitles_render.py (100%) rename {openshorts => backend/app}/prompts/CLAUDE.md (100%) rename {openshorts => backend/app}/prompts/__init__.py (100%) rename {openshorts => backend/app}/routes/__init__.py (100%) rename {openshorts => backend/app}/saas/__init__.py (100%) rename {openshorts => backend/app}/saas/pipeline.py (100%) rename {openshorts => backend/app}/thumbnails/__init__.py (100%) rename {openshorts => backend/app}/thumbnails/descriptions.py (100%) rename {openshorts => backend/app}/thumbnails/images.py (100%) rename {openshorts => backend/app}/thumbnails/titles.py (99%) rename {openshorts => backend/app}/utils/__init__.py (100%) rename {openshorts => backend/app}/utils/filters.py (100%) rename {openshorts => backend/app}/video/CLAUDE.md (100%) rename {openshorts => backend/app}/video/__init__.py (100%) rename {openshorts => backend/app}/video/ffmpeg.py (100%) rename {openshorts => backend/app}/video/pipeline.py (96%) rename {openshorts => backend/app}/video/reframing.py (100%) rename {openshorts => backend/app}/video/scene_analysis.py (97%) rename {openshorts => backend/app}/video/tracking.py (100%) rename pyproject.toml => backend/pyproject.toml (86%) rename requirements-dev.txt => backend/requirements-dev.txt (100%) rename requirements.txt => backend/requirements.txt (100%) rename {tests => backend/tests}/__init__.py (100%) rename {tests => backend/tests}/api/__init__.py (100%) rename {tests => backend/tests}/api/test_openapi_contract.py (96%) rename {tests => backend/tests}/conftest.py (95%) rename {tests => backend/tests}/e2e/__init__.py (100%) rename {tests => backend/tests}/e2e/test_pipeline_smoke.py (100%) rename {tests => backend/tests}/fixtures/README.md (100%) rename {tests => backend/tests}/snapshots/.gitkeep (100%) rename {tests => backend/tests}/snapshots/baseline.openapi.json (100%) rename {tests => backend/tests}/unit/__init__.py (100%) rename {tests => backend/tests}/unit/test_filter_sanitization.py (98%) rename {tests => backend/tests}/unit/test_hook_image.py (98%) rename {tests => backend/tests}/unit/test_srt_generation.py (96%) rename {tests => backend/tests}/unit/test_tracking.py (98%) rename {tests => backend/tests}/unit/test_translate_languages.py (93%) delete mode 100644 editor.py rename {dashboard => frontend}/.gitignore (100%) rename {dashboard => frontend}/Dockerfile (100%) rename {dashboard => frontend}/README.md (100%) rename {dashboard => frontend}/eslint.config.js (100%) rename {dashboard => frontend}/index.html (100%) rename {dashboard => frontend}/package-lock.json (100%) rename {dashboard => frontend}/package.json (100%) rename {dashboard => frontend}/postcss.config.js (100%) rename {dashboard => frontend}/public/logo-openshorts.png (100%) rename {dashboard => frontend}/public/og-image.png (100%) rename {dashboard => frontend}/public/robots.txt (100%) rename {dashboard => frontend}/public/sitemap.xml (100%) rename {dashboard => frontend}/public/vite.svg (100%) rename {dashboard => frontend}/src/App.css (100%) rename {dashboard => frontend}/src/App.jsx (100%) rename {dashboard => frontend}/src/Landing.jsx (100%) rename {dashboard => frontend}/src/Legal.jsx (100%) rename {dashboard => frontend}/src/assets/react.svg (100%) rename {dashboard => frontend}/src/components/Gallery.jsx (100%) rename {dashboard => frontend}/src/components/GalleryCard.jsx (100%) rename {dashboard => frontend}/src/components/HookModal.jsx (100%) rename {dashboard => frontend}/src/components/KeyInput.jsx (100%) rename {dashboard => frontend}/src/components/MediaInput.jsx (100%) rename {dashboard => frontend}/src/components/ProcessingAnimation.jsx (100%) rename {dashboard => frontend}/src/components/RemotionPreview.jsx (100%) rename {dashboard => frontend}/src/components/ResultCard.jsx (100%) rename {dashboard => frontend}/src/components/SaaShortsTab.jsx (100%) rename {dashboard => frontend}/src/components/ScheduleWeekModal.jsx (100%) rename {dashboard => frontend}/src/components/SubtitleModal.jsx (100%) rename {dashboard => frontend}/src/components/ThumbnailStudio.jsx (100%) rename {dashboard => frontend}/src/components/TranslateModal.jsx (100%) rename {dashboard => frontend}/src/components/UGCGallery.jsx (100%) rename {dashboard => frontend}/src/config.js (100%) rename {dashboard => frontend}/src/index.css (100%) rename {dashboard => frontend}/src/lib/renderInBrowser.js (100%) rename {dashboard => frontend}/src/main.jsx (100%) rename {dashboard => frontend}/src/remotion/compositions/HookOverlay.tsx (100%) rename {dashboard => frontend}/src/remotion/compositions/ShortVideo.tsx (100%) rename {dashboard => frontend}/src/remotion/compositions/Subtitles.tsx (100%) rename {dashboard => frontend}/src/remotion/compositions/VideoEffects.tsx (100%) rename {dashboard => frontend}/src/remotion/lib/captions.ts (100%) rename {dashboard => frontend}/src/remotion/lib/fonts.ts (100%) rename {dashboard => frontend}/src/remotion/lib/types.ts (100%) rename {dashboard => frontend}/tailwind.config.js (100%) rename {dashboard => frontend}/vite.config.js (100%) delete mode 100644 hooks.py delete mode 100644 openshorts/app.py rename {remotion => renderer/compositions}/.gitignore (100%) rename {remotion => renderer/compositions}/package.json (100%) rename {remotion => renderer/compositions}/public/fonts/NotoSerif-Bold.ttf (100%) rename {remotion => renderer/compositions}/src/Root.tsx (100%) rename {remotion => renderer/compositions}/src/compositions/HookOverlay.tsx (100%) rename {remotion => renderer/compositions}/src/compositions/ShortVideo.tsx (100%) rename {remotion => renderer/compositions}/src/compositions/Subtitles.tsx (100%) rename {remotion => renderer/compositions}/src/compositions/VideoEffects.tsx (100%) rename {remotion => renderer/compositions}/src/index.ts (100%) rename {remotion => renderer/compositions}/src/lib/captions.ts (100%) rename {remotion => renderer/compositions}/src/lib/fonts.ts (100%) rename {remotion => renderer/compositions}/src/lib/types.ts (100%) rename {remotion => renderer/compositions}/tsconfig.json (100%) rename {render-service => renderer/service}/.gitignore (100%) rename {render-service => renderer/service}/Dockerfile (61%) rename {render-service => renderer/service}/package-lock.json (100%) rename {render-service => renderer/service}/package.json (100%) rename {render-service => renderer/service}/src/bundle.ts (100%) rename {render-service => renderer/service}/src/render-worker.ts (100%) rename {render-service => renderer/service}/src/server.ts (100%) rename {render-service => renderer/service}/tsconfig.json (100%) delete mode 100644 s3_uploader.py delete mode 100644 saasshorts.py delete mode 100644 subtitles.py delete mode 100644 thumbnail.py delete mode 100644 translate.py delete mode 100644 verify_aesthetic.py delete mode 100644 verify_custom_hook.py delete mode 100644 verify_hooks.py diff --git a/.gitignore b/.gitignore index 57f2857b..c7c6eeaf 100644 --- a/.gitignore +++ b/.gitignore @@ -41,8 +41,8 @@ output/ .pytest_cache/ # Test ephemera (baseline.openapi.json IS committed; current is not) -tests/snapshots/current.openapi.json -tests/fixtures/smoke.mp4 +backend/tests/snapshots/current.openapi.json +backend/tests/fixtures/smoke.mp4 # Multi-agent Skills .agents/ .agent/ diff --git a/CLAUDE.md b/CLAUDE.md index a1e7ffa8..a4c695f3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -11,20 +11,38 @@ title generation, faster-whisper for transcription, PySceneDetect for scene boundaries, MediaPipe + YOLOv8 for face/person tracking, and FFmpeg for all encoding/overlay/mux work. +## Top-level layout + +``` +openshorts/ +├── backend/ # 🐍 Python FastAPI — API, video pipeline, tests +├── frontend/ # ⚛️ React + Vite — the dashboard UI +├── renderer/ # 🎬 Remotion service (TypeScript) + compositions +├── assets/ # 🖼️ Committed static files (fonts, screenshots) +├── scripts/ # 🛠️ Dev tooling (CLAUDE.md auto-updater, hook installer) +└── docker-compose.yml +``` + +Each top-level folder is self-contained: `backend/` has its own `Dockerfile` and Python deps, `frontend/` has its own `package.json`, `renderer/` bundles its own TypeScript. Docker Compose orchestrates all three. + ## Quick start ```bash -# Full stack (backend + frontend + Remotion renderer) +# Full stack (recommended) docker compose up --build +# Frontend → http://localhost:5175 +# Backend → http://localhost:8000 +# Renderer → http://localhost:3100 # Backend only (local dev — needs Python 3.11+ and FFmpeg on PATH) +cd backend pip install -r requirements.txt -r requirements-dev.txt pip install -e . pytest -m "not e2e" # unit + API contract suite (~0.6s) -uvicorn openshorts.app:app --host 0.0.0.0 --port 8000 +uvicorn app.main:app --host 0.0.0.0 --port 8000 # Frontend only -cd dashboard && npm install && npm run dev +cd frontend && npm install && npm run dev ``` Install the CLAUDE.md auto-updater hook once after cloning: @@ -39,23 +57,25 @@ When you want to **add** something, this is where it lands: | If you want to add… | Drop it in… | Notes | | --- | --- | --- | -| A new HTTP endpoint | `openshorts/routes/<domain>.py` + register in `openshorts/app.py` | The router split from `app.py` is in flight; until it ships, edit `app.py` directly. | -| A new FFmpeg operation | `openshorts/video/ffmpeg.py` | Never call `subprocess.run(['ffmpeg', ...])` outside this module. | -| A new external service client | `openshorts/integrations/<service>.py` | Each one exposes a typed Python client. | -| A new AI model / inference call | `openshorts/ml/<purpose>.py` | Detection, transcription, viral extraction, etc. | -| A new layout template | `openshorts/layouts/<name>.py` | Subclass `Layout` (see [ROADMAP.md](ROADMAP.md) feature B). | -| A new motion-graphic effect | `openshorts/motion_graphics/library/<name>.py` | Subclass `MotionGraphicEffect` (see [ROADMAP.md](ROADMAP.md) feature C). | -| A new audio mixer / SFX | `openshorts/audio/<concern>.py` | See [ROADMAP.md](ROADMAP.md) feature A. | -| A new Gemini prompt | `openshorts/prompts/<name>.md` or `openshorts/editing/prompts.py` | Externalize prompts; don't bury them in handler code. | -| A new Pydantic schema | `openshorts/models/<domain>.py` | One file per request/response domain. | -| A new shared FFmpeg / filter helper | `openshorts/utils/filters.py` | Already used by editing + future motion-graphics compositor. | -| A new core infrastructure piece | `openshorts/core/<concern>.py` | Job queue, job store, api-key resolver, logging. | +| A new HTTP endpoint | `backend/app/routes/<domain>.py` + register in `backend/app/main.py` | The full router split from `main.py` is in flight; until it ships, edit `backend/app/main.py` directly. | +| A new FFmpeg operation | `backend/app/video/ffmpeg.py` | Never call `subprocess.run(['ffmpeg', ...])` outside this module. | +| A new external service client | `backend/app/integrations/<service>.py` | Each one exposes a typed Python client. | +| A new AI model / inference call | `backend/app/ml/<purpose>.py` | Detection, transcription, viral extraction, etc. | +| A new layout template | `backend/app/layouts/<name>.py` | Subclass `Layout` (see [ROADMAP.md](ROADMAP.md) feature B). | +| A new motion-graphic effect | `backend/app/motion_graphics/library/<name>.py` | Subclass `MotionGraphicEffect` (see [ROADMAP.md](ROADMAP.md) feature C). | +| A new audio mixer / SFX | `backend/app/audio/<concern>.py` | See [ROADMAP.md](ROADMAP.md) feature A. | +| A new Gemini prompt | `backend/app/prompts/<name>.md` or `backend/app/editing/prompts.py` | Externalize prompts; don't bury them in handler code. | +| A new Pydantic schema | `backend/app/models/<domain>.py` | One file per request/response domain. | +| A new shared FFmpeg / filter helper | `backend/app/utils/filters.py` | Already used by editing + future motion-graphics compositor. | +| A new core infrastructure piece | `backend/app/core/<concern>.py` | Job queue, job store, api-key resolver, logging. | +| A new frontend page / component | `frontend/src/components/<Name>.jsx` | Match existing modal/card naming. | +| A new Remotion composition | `renderer/compositions/src/` | Service auto-bundles compositions in this folder. | When you want to **remove** something: 1. Delete the route file (or the function within it). -2. `grep -r <removed_name>` to find dead imports. -3. Delete the corresponding Pydantic model in `openshorts/models/` if any. +2. `grep -rn <removed_name> backend/` to find dead imports. +3. Delete the corresponding Pydantic model in `backend/app/models/` if any. 4. Delete or update tests that reference it. 5. Run `python scripts/update_claude_md.py` (the pre-commit hook will do this for you). @@ -66,86 +86,84 @@ The top-level folders. **The table below is auto-managed by `scripts/update_clau <!-- AUTO:REPO-MAP:START --> | Folder | What it is | | --- | --- | -| `dashboard/` | React + Vite frontend (out of scope for the current restructure). | -| `fonts/` | Committed TTFs (Noto Serif Bold) used by hook overlays. | -| `openshorts/` | Python package — all backend code lives here. | +| `assets/` | Committed static assets (fonts, screenshots). | +| `backend/` | Python FastAPI service — the API, video pipeline, and tests. | +| `frontend/` | React + Vite dashboard — the UI users interact with. | | `output/` | Runtime: generated clips and thumbnails (gitignored). | -| `remotion/` | Remotion compositions (TypeScript) consumed by the render-service. | -| `render-service/` | Standalone TypeScript microservice that bundles + renders Remotion compositions. | -| `screenshots/` | Repo screenshots used in README.md. | +| `renderer/` | Remotion render microservice (TypeScript) + compositions. | | `scripts/` | Developer tooling (update_claude_md.py, install_hooks.sh). | -| `tests/` | Pytest suite — unit, API contract, and e2e smoke. | | `uploads/` | Runtime: incoming video uploads (gitignored). | <!-- AUTO:REPO-MAP:END --> -### Backend package (`openshorts/`) +### Backend package (`backend/app/`) The Python package follows classical layered conventions. Each subfolder has a one-line purpose statement in its `__init__.py`. | Folder | Rule | | --- | --- | -| `openshorts/core/` | Cross-cutting infra: job queue, job store, API-key resolver, logging. | -| `openshorts/routes/` | FastAPI routers, one module per API domain. | -| `openshorts/video/` | All video work goes here. **FFmpeg only via `video/ffmpeg.py`.** | -| `openshorts/ml/` | AI inference: face/person detection, transcription, viral extraction. | -| `openshorts/audio/` | Future feature A — soundtracks + ducking. | -| `openshorts/layouts/` | Future feature B — layout templates (panorama, educational, etc.). | -| `openshorts/motion_graphics/` | Future feature C — animated overlays + multi-effect compositor. | -| `openshorts/editing/` | AI-generated FFmpeg filter pipeline. | -| `openshorts/overlays/` | Hook cards + subtitle generation / burn-in. | -| `openshorts/ingest/` | YouTube downloads + local upload handling. | -| `openshorts/saas/` | SaaSShorts UGC pipeline (research → script → media → composite). | -| `openshorts/integrations/` | External-service clients (S3, ElevenLabs, fal.ai, Upload-Post). | -| `openshorts/thumbnails/` | YouTube thumbnail workflow (titles, images, descriptions). | -| `openshorts/prompts/` | Externalized Gemini prompt templates. | -| `openshorts/models/` | Pydantic request/response schemas grouped by domain. | -| `openshorts/utils/` | Shared helpers: filter sanitization, path utilities. | +| `backend/app/core/` | Cross-cutting infra: job queue, job store, API-key resolver, logging. | +| `backend/app/routes/` | FastAPI routers, one module per API domain. | +| `backend/app/video/` | All video work goes here. **FFmpeg only via `video/ffmpeg.py`.** | +| `backend/app/ml/` | AI inference: face/person detection, transcription, viral extraction. | +| `backend/app/audio/` | Future feature A — soundtracks + ducking. | +| `backend/app/layouts/` | Future feature B — layout templates (panorama, educational, etc.). | +| `backend/app/motion_graphics/` | Future feature C — animated overlays + multi-effect compositor. | +| `backend/app/editing/` | AI-generated FFmpeg filter pipeline. | +| `backend/app/overlays/` | Hook cards + subtitle generation / burn-in. | +| `backend/app/ingest/` | YouTube downloads + local upload handling. | +| `backend/app/saas/` | SaaSShorts UGC pipeline (research → script → media → composite). | +| `backend/app/integrations/` | External-service clients (S3, ElevenLabs, fal.ai, Upload-Post). | +| `backend/app/thumbnails/` | YouTube thumbnail workflow (titles, images, descriptions). | +| `backend/app/prompts/` | Externalized Gemini prompt templates. | +| `backend/app/models/` | Pydantic request/response schemas grouped by domain. | +| `backend/app/utils/` | Shared helpers: filter sanitization, path utilities. | ## Module map -Every Python module under `openshorts/` and its public surface. **Auto-managed** — regenerated by the pre-commit hook from each file's docstring. +Every Python module under `backend/app/` and its public surface. **Auto-managed** — regenerated by the pre-commit hook from each file's docstring. <!-- AUTO:MODULE-MAP:START --> | Module | Purpose | Public surface | | --- | --- | --- | -| `openshorts/app.py` | FastAPI application entrypoint for the openshorts package. | _(none)_ | -| `openshorts/editing/ai_filters.py` | VideoEditor: Gemini-driven FFmpeg filter generation and application. | `VideoEditor` | -| `openshorts/editing/prompts.py` | Gemini prompt templates for AI video-effect generation. | `build_ffmpeg_filter_prompt`, `build_effects_config_prompt` | -| `openshorts/ingest/youtube.py` | YouTube downloader with bot-detection workarounds (yt-dlp + cookies + alt clients). | `sanitize_filename`, `download_youtube_video` | -| `openshorts/integrations/elevenlabs.py` | ElevenLabs Dubbing API client: AI voice translation across 30+ languages. | `create_dubbing_project`, `get_dubbing_status`, `download_dubbed_video`, `translate_video`, `get_supported_languages` | -| `openshorts/integrations/s3.py` | AWS S3 client: clip uploads, actor gallery, UGC video gallery, presigned URLs. | `upload_file_to_s3`, `get_s3_client`, `generate_presigned_url`, `list_all_clips`, `upload_actor_to_s3`, `list_actor_gallery`, `upload_video_to_gallery`, `list_video_gallery`, `upload_job_artifacts` | -| `openshorts/ml/detection.py` | Face and person detection: MediaPipe BlazeFace (primary) + YOLOv8 (fallback). | `detect_face_candidates`, `detect_person_yolo` | -| `openshorts/ml/transcription.py` | faster-whisper transcription: CPU-optimized (INT8 quantization) with word timestamps. | `transcribe_video` | -| `openshorts/ml/viral_extraction.py` | Gemini 2.5 Flash viral-moment extraction: picks 3-15 short clips from a transcript. | `get_viral_clips` | -| `openshorts/overlays/hooks.py` | Hook text overlays: PIL-rendered cards (PNG) burned onto video via FFmpeg. | `download_font_if_needed`, `create_hook_image`, `add_hook_to_video` | -| `openshorts/overlays/subtitles_generate.py` | SRT subtitle generation: transcription and word-level grouping into short lines. | `transcribe_audio`, `generate_srt_from_video`, `generate_srt`, `format_srt_block` | -| `openshorts/overlays/subtitles_render.py` | Subtitle burn-in: FFmpeg subtitles filter + ASS color/style conversion. | `hex_to_ass_color`, `burn_subtitles` | -| `openshorts/saas/pipeline.py` | SaaSShorts: AI-powered UGC video generator for SaaS products. | `research_saas_online`, `scrape_website`, `analyze_saas`, `generate_scripts`, `generate_actor_images`, `generate_actor_image`, `generate_voiceover`, `get_elevenlabs_voices`, `generate_talking_head`, `generate_talking_head_lowcost`, `generate_broll`, `transcribe_audio_for_subs`, `generate_tiktok_subs`, `generate_srt_from_script`, `composite_video`, `generate_full_video` | -| `openshorts/thumbnails/descriptions.py` | YouTube description + chapter-marker generation from transcript segments. | `generate_youtube_description` | -| `openshorts/thumbnails/images.py` | Thumbnail image generation via Gemini multimodal image preview model. | `generate_thumbnail` | -| `openshorts/thumbnails/titles.py` | Gemini-driven viral title generation and conversational refinement. | `analyze_video_for_titles`, `refine_titles` | -| `openshorts/utils/filters.py` | Shared FFmpeg filter helpers: chain splitting, sanitization, zoompan size enforcement. | `split_filter_chain`, `enforce_zoompan_output_size`, `sanitize_filter_string` | -| `openshorts/video/ffmpeg.py` | Single FFmpeg wrapper for the entire codebase. | `FFmpegError`, `run`, `probe_resolution`, `probe_duration`, `cut`, `extract_audio`, `mux_video_audio`, `overlay_png`, `build_filter_complex` | -| `openshorts/video/pipeline.py` | process_video_to_vertical orchestrator: scenes -> strategy -> per-frame crop -> mux. | `process_video_to_vertical` | -| `openshorts/video/reframing.py` | Vertical reframing helpers: blurred-background 'General Shot' composite. | `create_general_frame` | -| `openshorts/video/scene_analysis.py` | PySceneDetect scene boundaries + per-scene TRACK/GENERAL strategy analysis. | `detect_scenes`, `get_video_resolution`, `analyze_scenes_strategy` | -| `openshorts/video/tracking.py` | SmoothedCameraman and SpeakerTracker: the heart of stabilized vertical reframing. | `SmoothedCameraman`, `SpeakerTracker` | +| `backend/app/cli.py` | Compat shim + CLI entrypoint. | _(none)_ | +| `backend/app/editing/ai_filters.py` | VideoEditor: Gemini-driven FFmpeg filter generation and application. | `VideoEditor` | +| `backend/app/editing/prompts.py` | Gemini prompt templates for AI video-effect generation. | `build_ffmpeg_filter_prompt`, `build_effects_config_prompt` | +| `backend/app/ingest/youtube.py` | YouTube downloader with bot-detection workarounds (yt-dlp + cookies + alt clients). | `sanitize_filename`, `download_youtube_video` | +| `backend/app/integrations/elevenlabs.py` | ElevenLabs Dubbing API client: AI voice translation across 30+ languages. | `create_dubbing_project`, `get_dubbing_status`, `download_dubbed_video`, `translate_video`, `get_supported_languages` | +| `backend/app/integrations/s3.py` | AWS S3 client: clip uploads, actor gallery, UGC video gallery, presigned URLs. | `upload_file_to_s3`, `get_s3_client`, `generate_presigned_url`, `list_all_clips`, `upload_actor_to_s3`, `list_actor_gallery`, `upload_video_to_gallery`, `list_video_gallery`, `upload_job_artifacts` | +| `backend/app/main.py` | FastAPI application entrypoint: routes, job queue, and the wire-up of every backend feature. | `cleanup_jobs`, `process_queue`, `run_job_wrapper`, `lifespan`, `ProcessRequest`, `enqueue_output`, `run_job`, `get_config`, `process_endpoint`, `get_status`, `EditRequest`, `edit_clip`, `SubtitleRequest`, `get_clip_transcript`, `proxy_render`, `proxy_render_status`, `EffectsGenerateRequest`, `generate_effects_config`, `add_subtitles`, `HookRequest`, `add_hook`, `TranslateRequest`, `get_languages`, `translate_clip`, `SocialPostRequest`, `post_to_socials`, `get_social_user`, `thumbnail_upload`, `thumbnail_analyze`, `ThumbnailTitlesRequest`, `thumbnail_titles`, `thumbnail_generate`, `ThumbnailDescribeRequest`, `thumbnail_describe`, `thumbnail_publish`, `thumbnail_publish_status`, `SaaSAnalyzeRequest`, `saasshorts_analyze`, `SaaSActorRequest`, `saasshorts_actor_upload`, `saasshorts_actor_options`, `saasshorts_video_gallery`, `SaaSPostRequest`, `saasshorts_post_to_socials`, `gallery_html_page`, `video_html_page`, `saasshorts_actor_gallery`, `SaaSGenerateRequest`, `saasshorts_generate`, `saasshorts_status`, `saasshorts_voices` | +| `backend/app/ml/detection.py` | Face and person detection: MediaPipe BlazeFace (primary) + YOLOv8 (fallback). | `detect_face_candidates`, `detect_person_yolo` | +| `backend/app/ml/transcription.py` | faster-whisper transcription: CPU-optimized (INT8 quantization) with word timestamps. | `transcribe_video` | +| `backend/app/ml/viral_extraction.py` | Gemini 2.5 Flash viral-moment extraction: picks 3-15 short clips from a transcript. | `get_viral_clips` | +| `backend/app/overlays/hooks.py` | Hook text overlays: PIL-rendered cards (PNG) burned onto video via FFmpeg. | `download_font_if_needed`, `create_hook_image`, `add_hook_to_video` | +| `backend/app/overlays/subtitles_generate.py` | SRT subtitle generation: transcription and word-level grouping into short lines. | `transcribe_audio`, `generate_srt_from_video`, `generate_srt`, `format_srt_block` | +| `backend/app/overlays/subtitles_render.py` | Subtitle burn-in: FFmpeg subtitles filter + ASS color/style conversion. | `hex_to_ass_color`, `burn_subtitles` | +| `backend/app/saas/pipeline.py` | SaaSShorts: AI-powered UGC video generator for SaaS products. | `research_saas_online`, `scrape_website`, `analyze_saas`, `generate_scripts`, `generate_actor_images`, `generate_actor_image`, `generate_voiceover`, `get_elevenlabs_voices`, `generate_talking_head`, `generate_talking_head_lowcost`, `generate_broll`, `transcribe_audio_for_subs`, `generate_tiktok_subs`, `generate_srt_from_script`, `composite_video`, `generate_full_video` | +| `backend/app/thumbnails/descriptions.py` | YouTube description + chapter-marker generation from transcript segments. | `generate_youtube_description` | +| `backend/app/thumbnails/images.py` | Thumbnail image generation via Gemini multimodal image preview model. | `generate_thumbnail` | +| `backend/app/thumbnails/titles.py` | Gemini-driven viral title generation and conversational refinement. | `analyze_video_for_titles`, `refine_titles` | +| `backend/app/utils/filters.py` | Shared FFmpeg filter helpers: chain splitting, sanitization, zoompan size enforcement. | `split_filter_chain`, `enforce_zoompan_output_size`, `sanitize_filter_string` | +| `backend/app/video/ffmpeg.py` | Single FFmpeg wrapper for the entire codebase. | `FFmpegError`, `run`, `probe_resolution`, `probe_duration`, `cut`, `extract_audio`, `mux_video_audio`, `overlay_png`, `build_filter_complex` | +| `backend/app/video/pipeline.py` | process_video_to_vertical orchestrator: scenes -> strategy -> per-frame crop -> mux. | `process_video_to_vertical` | +| `backend/app/video/reframing.py` | Vertical reframing helpers: blurred-background 'General Shot' composite. | `create_general_frame` | +| `backend/app/video/scene_analysis.py` | PySceneDetect scene boundaries + per-scene TRACK/GENERAL strategy analysis. | `detect_scenes`, `get_video_resolution`, `analyze_scenes_strategy` | +| `backend/app/video/tracking.py` | SmoothedCameraman and SpeakerTracker: the heart of stabilized vertical reframing. | `SmoothedCameraman`, `SpeakerTracker` | <!-- AUTO:MODULE-MAP:END --> ## Processing pipeline -1. **Ingest** — `openshorts/ingest/youtube.py:download_youtube_video()` or a local upload. -2. **Transcribe** — `openshorts/ml/transcription.py:transcribe_video()` (faster-whisper, word timestamps). -3. **Scene-detect** — `openshorts/video/scene_analysis.py:detect_scenes()` (PySceneDetect). -4. **Viral extraction** — `openshorts/ml/viral_extraction.py:get_viral_clips()` (Gemini 2.5 Flash picks 3–15 clips, 15–60 s each). +1. **Ingest** — `backend/app/ingest/youtube.py:download_youtube_video()` or a local upload. +2. **Transcribe** — `backend/app/ml/transcription.py:transcribe_video()` (faster-whisper, word timestamps). +3. **Scene-detect** — `backend/app/video/scene_analysis.py:detect_scenes()` (PySceneDetect). +4. **Viral extraction** — `backend/app/ml/viral_extraction.py:get_viral_clips()` (Gemini 2.5 Flash picks 3–15 clips, 15–60 s each). 5. **Cut clips** — FFmpeg `-ss`/`-to` per clip. -6. **Strategy** — `openshorts/video/scene_analysis.py:analyze_scenes_strategy()` decides TRACK vs GENERAL per scene. -7. **Reframe** — `openshorts/video/pipeline.py:process_video_to_vertical()` runs the per-frame loop. -8. **Effects** (optional) — `openshorts/editing/ai_filters.py:VideoEditor` injects Gemini-generated FFmpeg filters. -9. **Hooks + subtitles** (optional) — `openshorts/overlays/`. -10. **Translate** (optional) — `openshorts/integrations/elevenlabs.py:translate_video()` dubs into 30+ languages. -11. **Backup + distribute** — `openshorts/integrations/s3.py` + `openshorts/integrations/upload_post.py` (planned). +6. **Strategy** — `backend/app/video/scene_analysis.py:analyze_scenes_strategy()` decides TRACK vs GENERAL per scene. +7. **Reframe** — `backend/app/video/pipeline.py:process_video_to_vertical()` runs the per-frame loop. +8. **Effects** (optional) — `backend/app/editing/ai_filters.py:VideoEditor` injects Gemini-generated FFmpeg filters. +9. **Hooks + subtitles** (optional) — `backend/app/overlays/`. +10. **Translate** (optional) — `backend/app/integrations/elevenlabs.py:translate_video()` dubs into 30+ languages. +11. **Backup + distribute** — `backend/app/integrations/s3.py` + `backend/app/integrations/upload_post.py` (planned). ## API surface @@ -194,12 +212,12 @@ ElevenLabs / Upload-Post / fal.ai keys are **client-side** (encrypted in browser ## Conventions -1. **Single FFmpeg wrapper.** Every `subprocess.run(['ffmpeg', ...])` call should funnel through `openshorts/video/ffmpeg.py`. Migration of existing callers is incremental — but new code must use the wrapper. -2. **API keys via headers, not env.** Client-side keys (Gemini, ElevenLabs, Upload-Post, fal.ai) arrive on each request as `X-...-Key`. The resolver helper for these lives in `openshorts/core/api_keys.py` (planned). Do NOT call `request.headers.get('X-...')` outside that file. -3. **Prompts as files.** New Gemini prompts go in `openshorts/prompts/<name>.md` and are loaded by name. Editing-domain prompts may stay inline in `openshorts/editing/prompts.py`. -4. **Every module starts with a docstring.** The pre-commit hook (`scripts/update_claude_md.py`) fails the commit if any `.py` file under `openshorts/` lacks one. Use a single line — it becomes the row in the auto-managed module map. +1. **Single FFmpeg wrapper.** Every `subprocess.run(['ffmpeg', ...])` call should funnel through `backend/app/video/ffmpeg.py`. Migration of existing callers is incremental — but new code must use the wrapper. +2. **API keys via headers, not env.** Client-side keys (Gemini, ElevenLabs, Upload-Post, fal.ai) arrive on each request as `X-...-Key`. The resolver helper for these lives in `backend/app/core/api_keys.py` (planned). Do NOT call `request.headers.get('X-...')` outside that file. +3. **Prompts as files.** New Gemini prompts go in `backend/app/prompts/<name>.md` and are loaded by name. Editing-domain prompts may stay inline in `backend/app/editing/prompts.py`. +4. **Every module starts with a docstring.** The pre-commit hook (`scripts/update_claude_md.py`) fails the commit if any `.py` file under `backend/app/` lacks one. Use a single line — it becomes the row in the auto-managed module map. 5. **Tests first.** A characterization test suite (`tests/`) was written *before* the restructure. Anything that touches behavior should keep `pytest -m "not e2e"` 100% green. The OpenAPI snapshot in `tests/snapshots/baseline.openapi.json` pins the public API. -6. **No new global dicts in routers.** Job state goes through `openshorts/core/job_store.py` (planned). Today, `app.py` still owns these dicts — keep them centralized there until the routers are split out. +6. **No new global dicts in routers.** Job state goes through `backend/app/core/job_store.py` (planned). Today, `backend/app/main.py` still owns these dicts — keep them centralized there until the routers are split out. ## Pointers @@ -207,7 +225,7 @@ ElevenLabs / Upload-Post / fal.ai keys are **client-side** (encrypted in browser - `scripts/update_claude_md.py` — what regenerates the auto-managed sections of this file. - `scripts/install_hooks.sh` — one-liner to wire up the pre-commit hook. - `tests/snapshots/baseline.openapi.json` — the contract that any backend change must keep green. -- `dashboard/` — the React/Vite frontend (deliberately out of scope for the current restructure). +- `frontend/` — the React/Vite frontend (deliberately out of scope for the current restructure). ## Tech stack diff --git a/fonts/NotoSerif-Bold.ttf b/assets/fonts/NotoSerif-Bold.ttf similarity index 100% rename from fonts/NotoSerif-Bold.ttf rename to assets/fonts/NotoSerif-Bold.ttf diff --git a/screenshots/ai-shorts.png b/assets/screenshots/ai-shorts.png similarity index 100% rename from screenshots/ai-shorts.png rename to assets/screenshots/ai-shorts.png diff --git a/screenshots/clip-results.png b/assets/screenshots/clip-results.png similarity index 100% rename from screenshots/clip-results.png rename to assets/screenshots/clip-results.png diff --git a/screenshots/ugc-gallery.png b/assets/screenshots/ugc-gallery.png similarity index 100% rename from screenshots/ugc-gallery.png rename to assets/screenshots/ugc-gallery.png diff --git a/screenshots/youtube-studio.png b/assets/screenshots/youtube-studio.png similarity index 100% rename from screenshots/youtube-studio.png rename to assets/screenshots/youtube-studio.png diff --git a/Dockerfile b/backend/Dockerfile similarity index 87% rename from Dockerfile rename to backend/Dockerfile index 498b6943..8910adf4 100644 --- a/Dockerfile +++ b/backend/Dockerfile @@ -60,7 +60,5 @@ RUN python -c "from ultralytics import YOLO; YOLO('yolov8n.pt')" # Expose FastAPI port EXPOSE 8000 -# Run FastAPI app -# openshorts.app re-exports the FastAPI instance from the root-level app.py -# (it inserts the repo root onto sys.path itself, no pip install -e needed). -CMD ["uvicorn", "openshorts.app:app", "--host", "0.0.0.0", "--port", "8000"] +# Run FastAPI app — the FastAPI instance lives at app.main:app +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/openshorts/__init__.py b/backend/app/__init__.py similarity index 100% rename from openshorts/__init__.py rename to backend/app/__init__.py diff --git a/openshorts/audio/CLAUDE.md b/backend/app/audio/CLAUDE.md similarity index 100% rename from openshorts/audio/CLAUDE.md rename to backend/app/audio/CLAUDE.md diff --git a/openshorts/audio/__init__.py b/backend/app/audio/__init__.py similarity index 100% rename from openshorts/audio/__init__.py rename to backend/app/audio/__init__.py diff --git a/main.py b/backend/app/cli.py similarity index 92% rename from main.py rename to backend/app/cli.py index 07b1db9b..aa3c4621 100644 --- a/main.py +++ b/backend/app/cli.py @@ -21,22 +21,22 @@ load_dotenv() # Re-exports (used by app.py, thumbnail.py, and existing tests) -from openshorts.video.tracking import ( # noqa: F401 +from app.video.tracking import ( # noqa: F401 ASPECT_RATIO, SmoothedCameraman, SpeakerTracker, ) -from openshorts.video.scene_analysis import ( # noqa: F401 +from app.video.scene_analysis import ( # noqa: F401 detect_scenes, get_video_resolution, analyze_scenes_strategy, ) -from openshorts.video.reframing import create_general_frame # noqa: F401 -from openshorts.video.pipeline import process_video_to_vertical # noqa: F401 -from openshorts.ml.detection import detect_face_candidates, detect_person_yolo # noqa: F401 -from openshorts.ml.transcription import transcribe_video # noqa: F401 -from openshorts.ml.viral_extraction import GEMINI_PROMPT_TEMPLATE, get_viral_clips # noqa: F401 -from openshorts.ingest.youtube import download_youtube_video, sanitize_filename # noqa: F401 +from app.video.reframing import create_general_frame # noqa: F401 +from app.video.pipeline import process_video_to_vertical # noqa: F401 +from app.ml.detection import detect_face_candidates, detect_person_yolo # noqa: F401 +from app.ml.transcription import transcribe_video # noqa: F401 +from app.ml.viral_extraction import GEMINI_PROMPT_TEMPLATE, get_viral_clips # noqa: F401 +from app.ingest.youtube import download_youtube_video, sanitize_filename # noqa: F401 def _cli(): diff --git a/openshorts/core/__init__.py b/backend/app/core/__init__.py similarity index 100% rename from openshorts/core/__init__.py rename to backend/app/core/__init__.py diff --git a/openshorts/editing/__init__.py b/backend/app/editing/__init__.py similarity index 100% rename from openshorts/editing/__init__.py rename to backend/app/editing/__init__.py diff --git a/openshorts/editing/ai_filters.py b/backend/app/editing/ai_filters.py similarity index 99% rename from openshorts/editing/ai_filters.py rename to backend/app/editing/ai_filters.py index 7199e265..045d2598 100644 --- a/openshorts/editing/ai_filters.py +++ b/backend/app/editing/ai_filters.py @@ -12,11 +12,11 @@ from google import genai from google.genai import types -from openshorts.editing.prompts import ( +from app.editing.prompts import ( build_ffmpeg_filter_prompt, build_effects_config_prompt, ) -from openshorts.utils.filters import ( +from app.utils.filters import ( split_filter_chain as _split_filter_chain_fn, enforce_zoompan_output_size as _enforce_zoompan_output_size_fn, sanitize_filter_string as _sanitize_filter_string_fn, diff --git a/openshorts/editing/prompts.py b/backend/app/editing/prompts.py similarity index 100% rename from openshorts/editing/prompts.py rename to backend/app/editing/prompts.py diff --git a/openshorts/ingest/__init__.py b/backend/app/ingest/__init__.py similarity index 100% rename from openshorts/ingest/__init__.py rename to backend/app/ingest/__init__.py diff --git a/openshorts/ingest/youtube.py b/backend/app/ingest/youtube.py similarity index 100% rename from openshorts/ingest/youtube.py rename to backend/app/ingest/youtube.py diff --git a/openshorts/integrations/__init__.py b/backend/app/integrations/__init__.py similarity index 100% rename from openshorts/integrations/__init__.py rename to backend/app/integrations/__init__.py diff --git a/openshorts/integrations/elevenlabs.py b/backend/app/integrations/elevenlabs.py similarity index 100% rename from openshorts/integrations/elevenlabs.py rename to backend/app/integrations/elevenlabs.py diff --git a/openshorts/integrations/s3.py b/backend/app/integrations/s3.py similarity index 100% rename from openshorts/integrations/s3.py rename to backend/app/integrations/s3.py diff --git a/openshorts/layouts/CLAUDE.md b/backend/app/layouts/CLAUDE.md similarity index 100% rename from openshorts/layouts/CLAUDE.md rename to backend/app/layouts/CLAUDE.md diff --git a/openshorts/layouts/__init__.py b/backend/app/layouts/__init__.py similarity index 100% rename from openshorts/layouts/__init__.py rename to backend/app/layouts/__init__.py diff --git a/app.py b/backend/app/main.py similarity index 98% rename from app.py rename to backend/app/main.py index 0da72341..d38fc7ec 100644 --- a/app.py +++ b/backend/app/main.py @@ -1,3 +1,4 @@ +"""FastAPI application entrypoint: routes, job queue, and the wire-up of every backend feature.""" import os import uuid import subprocess @@ -15,7 +16,7 @@ from fastapi.staticfiles import StaticFiles from fastapi.responses import HTMLResponse from pydantic import BaseModel -from s3_uploader import upload_job_artifacts, list_all_clips, upload_actor_to_s3, list_actor_gallery, upload_video_to_gallery, list_video_gallery +from app.integrations.s3 import upload_job_artifacts, list_all_clips, upload_actor_to_s3, list_actor_gallery, upload_video_to_gallery, list_video_gallery load_dotenv() @@ -416,11 +417,14 @@ async def get_status(job_id: str): "result": job.get('result') } -from editor import VideoEditor -from subtitles import generate_srt, burn_subtitles, generate_srt_from_video -from hooks import add_hook_to_video -from translate import translate_video, get_supported_languages -from thumbnail import analyze_video_for_titles, refine_titles, generate_thumbnail, generate_youtube_description +from app.editing.ai_filters import VideoEditor +from app.overlays.subtitles_generate import generate_srt, generate_srt_from_video +from app.overlays.subtitles_render import burn_subtitles +from app.overlays.hooks import add_hook_to_video +from app.integrations.elevenlabs import translate_video, get_supported_languages +from app.thumbnails.titles import analyze_video_for_titles, refine_titles +from app.thumbnails.images import generate_thumbnail +from app.thumbnails.descriptions import generate_youtube_description class EditRequest(BaseModel): job_id: str @@ -1237,12 +1241,12 @@ async def run_background_whisper(): vpath = video_path # Download YouTube video if URL was provided if not vpath and url: - from main import download_youtube_video + from app.ingest.youtube import download_youtube_video loop = asyncio.get_event_loop() vpath, _ = await loop.run_in_executor(None, download_youtube_video, url, UPLOAD_DIR) thumbnail_sessions[session_id]["video_path"] = vpath - from main import transcribe_video + from app.ml.transcription import transcribe_video loop = asyncio.get_event_loop() transcript = await loop.run_in_executor(None, transcribe_video, vpath) segments = transcript.get("segments", []) @@ -1309,7 +1313,7 @@ async def thumbnail_analyze( session_id = str(uuid.uuid4()) if url: - from main import download_youtube_video + from app.ingest.youtube import download_youtube_video video_path, _ = download_youtube_video(url, UPLOAD_DIR) else: video_path = os.path.join(UPLOAD_DIR, f"thumb_{session_id}_{file.filename}") @@ -1643,7 +1647,7 @@ async def thumbnail_publish_status(publish_id: str): # SaaSShorts: AI UGC Video Generator for SaaS Products # ═══════════════════════════════════════════════════════════════════════ -from saasshorts import ( +from app.saas.pipeline import ( scrape_website, research_saas_online, analyze_saas, diff --git a/openshorts/ml/__init__.py b/backend/app/ml/__init__.py similarity index 100% rename from openshorts/ml/__init__.py rename to backend/app/ml/__init__.py diff --git a/openshorts/ml/detection.py b/backend/app/ml/detection.py similarity index 100% rename from openshorts/ml/detection.py rename to backend/app/ml/detection.py diff --git a/openshorts/ml/transcription.py b/backend/app/ml/transcription.py similarity index 100% rename from openshorts/ml/transcription.py rename to backend/app/ml/transcription.py diff --git a/openshorts/ml/viral_extraction.py b/backend/app/ml/viral_extraction.py similarity index 100% rename from openshorts/ml/viral_extraction.py rename to backend/app/ml/viral_extraction.py diff --git a/openshorts/models/__init__.py b/backend/app/models/__init__.py similarity index 100% rename from openshorts/models/__init__.py rename to backend/app/models/__init__.py diff --git a/openshorts/motion_graphics/CLAUDE.md b/backend/app/motion_graphics/CLAUDE.md similarity index 100% rename from openshorts/motion_graphics/CLAUDE.md rename to backend/app/motion_graphics/CLAUDE.md diff --git a/openshorts/motion_graphics/__init__.py b/backend/app/motion_graphics/__init__.py similarity index 100% rename from openshorts/motion_graphics/__init__.py rename to backend/app/motion_graphics/__init__.py diff --git a/openshorts/motion_graphics/library/__init__.py b/backend/app/motion_graphics/library/__init__.py similarity index 100% rename from openshorts/motion_graphics/library/__init__.py rename to backend/app/motion_graphics/library/__init__.py diff --git a/openshorts/overlays/__init__.py b/backend/app/overlays/__init__.py similarity index 100% rename from openshorts/overlays/__init__.py rename to backend/app/overlays/__init__.py diff --git a/openshorts/overlays/hooks.py b/backend/app/overlays/hooks.py similarity index 93% rename from openshorts/overlays/hooks.py rename to backend/app/overlays/hooks.py index e0bb217f..74f785eb 100644 --- a/openshorts/overlays/hooks.py +++ b/backend/app/overlays/hooks.py @@ -3,10 +3,28 @@ import textwrap import subprocess import urllib.request +from pathlib import Path from PIL import Image, ImageDraw, ImageFont, ImageFilter FONT_URL = "https://github.com/googlefonts/noto-fonts/raw/main/hinted/ttf/NotoSerif/NotoSerif-Bold.ttf" -FONT_DIR = "fonts" + + +def _resolve_font_dir() -> str: + """Locate the committed font dir whether running from repo root or backend/.""" + env_override = os.environ.get("OPENSHORTS_FONT_DIR") + if env_override: + return env_override + # Walk up from this file (backend/app/overlays/hooks.py) looking for assets/fonts. + here = Path(__file__).resolve() + for parent in here.parents: + candidate = parent / "assets" / "fonts" + if candidate.is_dir(): + return str(candidate) + # Last resort: cwd-relative (used as the download target). + return "assets/fonts" + + +FONT_DIR = _resolve_font_dir() FONT_PATH = os.path.join(FONT_DIR, "NotoSerif-Bold.ttf") def download_font_if_needed(): diff --git a/openshorts/overlays/subtitles_generate.py b/backend/app/overlays/subtitles_generate.py similarity index 100% rename from openshorts/overlays/subtitles_generate.py rename to backend/app/overlays/subtitles_generate.py diff --git a/openshorts/overlays/subtitles_render.py b/backend/app/overlays/subtitles_render.py similarity index 100% rename from openshorts/overlays/subtitles_render.py rename to backend/app/overlays/subtitles_render.py diff --git a/openshorts/prompts/CLAUDE.md b/backend/app/prompts/CLAUDE.md similarity index 100% rename from openshorts/prompts/CLAUDE.md rename to backend/app/prompts/CLAUDE.md diff --git a/openshorts/prompts/__init__.py b/backend/app/prompts/__init__.py similarity index 100% rename from openshorts/prompts/__init__.py rename to backend/app/prompts/__init__.py diff --git a/openshorts/routes/__init__.py b/backend/app/routes/__init__.py similarity index 100% rename from openshorts/routes/__init__.py rename to backend/app/routes/__init__.py diff --git a/openshorts/saas/__init__.py b/backend/app/saas/__init__.py similarity index 100% rename from openshorts/saas/__init__.py rename to backend/app/saas/__init__.py diff --git a/openshorts/saas/pipeline.py b/backend/app/saas/pipeline.py similarity index 100% rename from openshorts/saas/pipeline.py rename to backend/app/saas/pipeline.py diff --git a/openshorts/thumbnails/__init__.py b/backend/app/thumbnails/__init__.py similarity index 100% rename from openshorts/thumbnails/__init__.py rename to backend/app/thumbnails/__init__.py diff --git a/openshorts/thumbnails/descriptions.py b/backend/app/thumbnails/descriptions.py similarity index 100% rename from openshorts/thumbnails/descriptions.py rename to backend/app/thumbnails/descriptions.py diff --git a/openshorts/thumbnails/images.py b/backend/app/thumbnails/images.py similarity index 100% rename from openshorts/thumbnails/images.py rename to backend/app/thumbnails/images.py diff --git a/openshorts/thumbnails/titles.py b/backend/app/thumbnails/titles.py similarity index 99% rename from openshorts/thumbnails/titles.py rename to backend/app/thumbnails/titles.py index 125f8f29..6daf3d73 100644 --- a/openshorts/thumbnails/titles.py +++ b/backend/app/thumbnails/titles.py @@ -13,7 +13,7 @@ def analyze_video_for_titles(api_key, video_path, transcript=None): Returns: { "titles": [...], "transcript_summary": "...", "language": "...", "segments": [...], "video_duration": ... } """ if transcript is None: - from main import transcribe_video + from app.ml.transcription import transcribe_video print("🎬 [Thumbnail] Transcribing video...") transcript = transcribe_video(video_path) else: diff --git a/openshorts/utils/__init__.py b/backend/app/utils/__init__.py similarity index 100% rename from openshorts/utils/__init__.py rename to backend/app/utils/__init__.py diff --git a/openshorts/utils/filters.py b/backend/app/utils/filters.py similarity index 100% rename from openshorts/utils/filters.py rename to backend/app/utils/filters.py diff --git a/openshorts/video/CLAUDE.md b/backend/app/video/CLAUDE.md similarity index 100% rename from openshorts/video/CLAUDE.md rename to backend/app/video/CLAUDE.md diff --git a/openshorts/video/__init__.py b/backend/app/video/__init__.py similarity index 100% rename from openshorts/video/__init__.py rename to backend/app/video/__init__.py diff --git a/openshorts/video/ffmpeg.py b/backend/app/video/ffmpeg.py similarity index 100% rename from openshorts/video/ffmpeg.py rename to backend/app/video/ffmpeg.py diff --git a/openshorts/video/pipeline.py b/backend/app/video/pipeline.py similarity index 96% rename from openshorts/video/pipeline.py rename to backend/app/video/pipeline.py index aa861a21..cb9bafbf 100644 --- a/openshorts/video/pipeline.py +++ b/backend/app/video/pipeline.py @@ -12,14 +12,14 @@ import cv2 from tqdm import tqdm -from openshorts.video.tracking import SmoothedCameraman, SpeakerTracker, ASPECT_RATIO -from openshorts.video.scene_analysis import ( +from app.video.tracking import SmoothedCameraman, SpeakerTracker, ASPECT_RATIO +from app.video.scene_analysis import ( detect_scenes, get_video_resolution, analyze_scenes_strategy, ) -from openshorts.video.reframing import create_general_frame -from openshorts.ml.detection import detect_face_candidates, detect_person_yolo +from app.video.reframing import create_general_frame +from app.ml.detection import detect_face_candidates, detect_person_yolo def process_video_to_vertical(input_video, final_output_video): diff --git a/openshorts/video/reframing.py b/backend/app/video/reframing.py similarity index 100% rename from openshorts/video/reframing.py rename to backend/app/video/reframing.py diff --git a/openshorts/video/scene_analysis.py b/backend/app/video/scene_analysis.py similarity index 97% rename from openshorts/video/scene_analysis.py rename to backend/app/video/scene_analysis.py index 7c49d2ab..efcdd686 100644 --- a/openshorts/video/scene_analysis.py +++ b/backend/app/video/scene_analysis.py @@ -5,7 +5,7 @@ from scenedetect.detectors import ContentDetector from tqdm import tqdm -from openshorts.ml.detection import detect_face_candidates +from app.ml.detection import detect_face_candidates def detect_scenes(video_path): diff --git a/openshorts/video/tracking.py b/backend/app/video/tracking.py similarity index 100% rename from openshorts/video/tracking.py rename to backend/app/video/tracking.py diff --git a/pyproject.toml b/backend/pyproject.toml similarity index 86% rename from pyproject.toml rename to backend/pyproject.toml index 4f740495..5ea13ab9 100644 --- a/pyproject.toml +++ b/backend/pyproject.toml @@ -10,8 +10,8 @@ requires-python = ">=3.9" [tool.setuptools.packages.find] where = ["."] -include = ["openshorts*"] -exclude = ["tests*", "dashboard*", "remotion*", "render-service*"] +include = ["app*"] +exclude = ["tests*"] [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/requirements-dev.txt b/backend/requirements-dev.txt similarity index 100% rename from requirements-dev.txt rename to backend/requirements-dev.txt diff --git a/requirements.txt b/backend/requirements.txt similarity index 100% rename from requirements.txt rename to backend/requirements.txt diff --git a/tests/__init__.py b/backend/tests/__init__.py similarity index 100% rename from tests/__init__.py rename to backend/tests/__init__.py diff --git a/tests/api/__init__.py b/backend/tests/api/__init__.py similarity index 100% rename from tests/api/__init__.py rename to backend/tests/api/__init__.py diff --git a/tests/api/test_openapi_contract.py b/backend/tests/api/test_openapi_contract.py similarity index 96% rename from tests/api/test_openapi_contract.py rename to backend/tests/api/test_openapi_contract.py index 8b7c547b..788b3257 100644 --- a/tests/api/test_openapi_contract.py +++ b/backend/tests/api/test_openapi_contract.py @@ -36,9 +36,9 @@ def app_client(tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) from fastapi.testclient import TestClient - import app as app_module # noqa: WPS433 intentional late import + from app.main import app as fastapi_app # noqa: WPS433 intentional late import - with TestClient(app_module.app) as client: + with TestClient(fastapi_app) as client: yield client diff --git a/tests/conftest.py b/backend/tests/conftest.py similarity index 95% rename from tests/conftest.py rename to backend/tests/conftest.py index fe22235b..b04261af 100644 --- a/tests/conftest.py +++ b/backend/tests/conftest.py @@ -18,10 +18,10 @@ from pathlib import Path from unittest.mock import MagicMock -# Make the repo root importable so `import main`, `import editor`, etc work. -REPO_ROOT = Path(__file__).resolve().parent.parent -if str(REPO_ROOT) not in sys.path: - sys.path.insert(0, str(REPO_ROOT)) +# Make `backend/` importable so `import app.*` resolves. +BACKEND_ROOT = Path(__file__).resolve().parent.parent +if str(BACKEND_ROOT) not in sys.path: + sys.path.insert(0, str(BACKEND_ROOT)) # --- Heavy module stubbing ----------------------------------------------- diff --git a/tests/e2e/__init__.py b/backend/tests/e2e/__init__.py similarity index 100% rename from tests/e2e/__init__.py rename to backend/tests/e2e/__init__.py diff --git a/tests/e2e/test_pipeline_smoke.py b/backend/tests/e2e/test_pipeline_smoke.py similarity index 100% rename from tests/e2e/test_pipeline_smoke.py rename to backend/tests/e2e/test_pipeline_smoke.py diff --git a/tests/fixtures/README.md b/backend/tests/fixtures/README.md similarity index 100% rename from tests/fixtures/README.md rename to backend/tests/fixtures/README.md diff --git a/tests/snapshots/.gitkeep b/backend/tests/snapshots/.gitkeep similarity index 100% rename from tests/snapshots/.gitkeep rename to backend/tests/snapshots/.gitkeep diff --git a/tests/snapshots/baseline.openapi.json b/backend/tests/snapshots/baseline.openapi.json similarity index 100% rename from tests/snapshots/baseline.openapi.json rename to backend/tests/snapshots/baseline.openapi.json diff --git a/tests/unit/__init__.py b/backend/tests/unit/__init__.py similarity index 100% rename from tests/unit/__init__.py rename to backend/tests/unit/__init__.py diff --git a/tests/unit/test_filter_sanitization.py b/backend/tests/unit/test_filter_sanitization.py similarity index 98% rename from tests/unit/test_filter_sanitization.py rename to backend/tests/unit/test_filter_sanitization.py index 60e9e4ae..e7a4b95d 100644 --- a/tests/unit/test_filter_sanitization.py +++ b/backend/tests/unit/test_filter_sanitization.py @@ -8,7 +8,7 @@ """ import pytest -from editor import VideoEditor +from app.editing.ai_filters import VideoEditor # --- _sanitize_filter_string -------------------------------------------- diff --git a/tests/unit/test_hook_image.py b/backend/tests/unit/test_hook_image.py similarity index 98% rename from tests/unit/test_hook_image.py rename to backend/tests/unit/test_hook_image.py index ca359cf6..bb2f5c4b 100644 --- a/tests/unit/test_hook_image.py +++ b/backend/tests/unit/test_hook_image.py @@ -12,7 +12,7 @@ import pytest from PIL import Image -from hooks import create_hook_image +from app.overlays.hooks import create_hook_image def test_create_hook_image_writes_a_png(tmp_path): diff --git a/tests/unit/test_srt_generation.py b/backend/tests/unit/test_srt_generation.py similarity index 96% rename from tests/unit/test_srt_generation.py rename to backend/tests/unit/test_srt_generation.py index 34242568..1ef542b8 100644 --- a/tests/unit/test_srt_generation.py +++ b/backend/tests/unit/test_srt_generation.py @@ -7,7 +7,8 @@ import re import pytest -from subtitles import format_srt_block, generate_srt, hex_to_ass_color +from app.overlays.subtitles_generate import format_srt_block, generate_srt +from app.overlays.subtitles_render import hex_to_ass_color # --- format_srt_block --------------------------------------------------- diff --git a/tests/unit/test_tracking.py b/backend/tests/unit/test_tracking.py similarity index 98% rename from tests/unit/test_tracking.py rename to backend/tests/unit/test_tracking.py index e706c486..9f520374 100644 --- a/tests/unit/test_tracking.py +++ b/backend/tests/unit/test_tracking.py @@ -13,7 +13,7 @@ """ import pytest -from main import SmoothedCameraman, SpeakerTracker +from app.video.tracking import SmoothedCameraman, SpeakerTracker # --- SmoothedCameraman --------------------------------------------------- diff --git a/tests/unit/test_translate_languages.py b/backend/tests/unit/test_translate_languages.py similarity index 93% rename from tests/unit/test_translate_languages.py rename to backend/tests/unit/test_translate_languages.py index a7ac107e..0f4312a7 100644 --- a/tests/unit/test_translate_languages.py +++ b/backend/tests/unit/test_translate_languages.py @@ -5,7 +5,7 @@ Locks in the public surface so the restructure can't accidentally drop or rename a language code. """ -from translate import SUPPORTED_LANGUAGES, get_supported_languages +from app.integrations.elevenlabs import SUPPORTED_LANGUAGES, get_supported_languages # A minimal canonical set we want the API to keep advertising. diff --git a/docker-compose.yml b/docker-compose.yml index fa544bea..057fe07b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,22 +1,24 @@ services: backend: - build: . + build: ./backend container_name: openshorts-backend ports: - "8000:8000" volumes: - - .:/app + - ./backend:/app + - ./assets:/app/assets:ro - /app/__pycache__ - ./output:/app/output + - ./uploads:/app/uploads restart: unless-stopped frontend: - build: ./dashboard + build: ./frontend container_name: openshorts-frontend ports: - "5175:5173" volumes: - - ./dashboard:/app + - ./frontend:/app - /app/node_modules restart: unless-stopped depends_on: @@ -25,7 +27,7 @@ services: renderer: build: context: . - dockerfile: render-service/Dockerfile + dockerfile: renderer/service/Dockerfile container_name: openshorts-renderer ports: - "3100:3100" diff --git a/editor.py b/editor.py deleted file mode 100644 index 1c463222..00000000 --- a/editor.py +++ /dev/null @@ -1,11 +0,0 @@ -"""Compat shim: re-exports openshorts.editing.ai_filters.VideoEditor at the original path. - -This module was split into three files as part of the restructure: -- openshorts/editing/ai_filters.py (VideoEditor class) -- openshorts/editing/prompts.py (Gemini prompt templates) -- openshorts/utils/filters.py (shared FFmpeg filter helpers) - -New code should import from those modules directly. This shim keeps existing -`from editor import VideoEditor` calls working. -""" -from openshorts.editing.ai_filters import VideoEditor # noqa: F401 diff --git a/dashboard/.gitignore b/frontend/.gitignore similarity index 100% rename from dashboard/.gitignore rename to frontend/.gitignore diff --git a/dashboard/Dockerfile b/frontend/Dockerfile similarity index 100% rename from dashboard/Dockerfile rename to frontend/Dockerfile diff --git a/dashboard/README.md b/frontend/README.md similarity index 100% rename from dashboard/README.md rename to frontend/README.md diff --git a/dashboard/eslint.config.js b/frontend/eslint.config.js similarity index 100% rename from dashboard/eslint.config.js rename to frontend/eslint.config.js diff --git a/dashboard/index.html b/frontend/index.html similarity index 100% rename from dashboard/index.html rename to frontend/index.html diff --git a/dashboard/package-lock.json b/frontend/package-lock.json similarity index 100% rename from dashboard/package-lock.json rename to frontend/package-lock.json diff --git a/dashboard/package.json b/frontend/package.json similarity index 100% rename from dashboard/package.json rename to frontend/package.json diff --git a/dashboard/postcss.config.js b/frontend/postcss.config.js similarity index 100% rename from dashboard/postcss.config.js rename to frontend/postcss.config.js diff --git a/dashboard/public/logo-openshorts.png b/frontend/public/logo-openshorts.png similarity index 100% rename from dashboard/public/logo-openshorts.png rename to frontend/public/logo-openshorts.png diff --git a/dashboard/public/og-image.png b/frontend/public/og-image.png similarity index 100% rename from dashboard/public/og-image.png rename to frontend/public/og-image.png diff --git a/dashboard/public/robots.txt b/frontend/public/robots.txt similarity index 100% rename from dashboard/public/robots.txt rename to frontend/public/robots.txt diff --git a/dashboard/public/sitemap.xml b/frontend/public/sitemap.xml similarity index 100% rename from dashboard/public/sitemap.xml rename to frontend/public/sitemap.xml diff --git a/dashboard/public/vite.svg b/frontend/public/vite.svg similarity index 100% rename from dashboard/public/vite.svg rename to frontend/public/vite.svg diff --git a/dashboard/src/App.css b/frontend/src/App.css similarity index 100% rename from dashboard/src/App.css rename to frontend/src/App.css diff --git a/dashboard/src/App.jsx b/frontend/src/App.jsx similarity index 100% rename from dashboard/src/App.jsx rename to frontend/src/App.jsx diff --git a/dashboard/src/Landing.jsx b/frontend/src/Landing.jsx similarity index 100% rename from dashboard/src/Landing.jsx rename to frontend/src/Landing.jsx diff --git a/dashboard/src/Legal.jsx b/frontend/src/Legal.jsx similarity index 100% rename from dashboard/src/Legal.jsx rename to frontend/src/Legal.jsx diff --git a/dashboard/src/assets/react.svg b/frontend/src/assets/react.svg similarity index 100% rename from dashboard/src/assets/react.svg rename to frontend/src/assets/react.svg diff --git a/dashboard/src/components/Gallery.jsx b/frontend/src/components/Gallery.jsx similarity index 100% rename from dashboard/src/components/Gallery.jsx rename to frontend/src/components/Gallery.jsx diff --git a/dashboard/src/components/GalleryCard.jsx b/frontend/src/components/GalleryCard.jsx similarity index 100% rename from dashboard/src/components/GalleryCard.jsx rename to frontend/src/components/GalleryCard.jsx diff --git a/dashboard/src/components/HookModal.jsx b/frontend/src/components/HookModal.jsx similarity index 100% rename from dashboard/src/components/HookModal.jsx rename to frontend/src/components/HookModal.jsx diff --git a/dashboard/src/components/KeyInput.jsx b/frontend/src/components/KeyInput.jsx similarity index 100% rename from dashboard/src/components/KeyInput.jsx rename to frontend/src/components/KeyInput.jsx diff --git a/dashboard/src/components/MediaInput.jsx b/frontend/src/components/MediaInput.jsx similarity index 100% rename from dashboard/src/components/MediaInput.jsx rename to frontend/src/components/MediaInput.jsx diff --git a/dashboard/src/components/ProcessingAnimation.jsx b/frontend/src/components/ProcessingAnimation.jsx similarity index 100% rename from dashboard/src/components/ProcessingAnimation.jsx rename to frontend/src/components/ProcessingAnimation.jsx diff --git a/dashboard/src/components/RemotionPreview.jsx b/frontend/src/components/RemotionPreview.jsx similarity index 100% rename from dashboard/src/components/RemotionPreview.jsx rename to frontend/src/components/RemotionPreview.jsx diff --git a/dashboard/src/components/ResultCard.jsx b/frontend/src/components/ResultCard.jsx similarity index 100% rename from dashboard/src/components/ResultCard.jsx rename to frontend/src/components/ResultCard.jsx diff --git a/dashboard/src/components/SaaShortsTab.jsx b/frontend/src/components/SaaShortsTab.jsx similarity index 100% rename from dashboard/src/components/SaaShortsTab.jsx rename to frontend/src/components/SaaShortsTab.jsx diff --git a/dashboard/src/components/ScheduleWeekModal.jsx b/frontend/src/components/ScheduleWeekModal.jsx similarity index 100% rename from dashboard/src/components/ScheduleWeekModal.jsx rename to frontend/src/components/ScheduleWeekModal.jsx diff --git a/dashboard/src/components/SubtitleModal.jsx b/frontend/src/components/SubtitleModal.jsx similarity index 100% rename from dashboard/src/components/SubtitleModal.jsx rename to frontend/src/components/SubtitleModal.jsx diff --git a/dashboard/src/components/ThumbnailStudio.jsx b/frontend/src/components/ThumbnailStudio.jsx similarity index 100% rename from dashboard/src/components/ThumbnailStudio.jsx rename to frontend/src/components/ThumbnailStudio.jsx diff --git a/dashboard/src/components/TranslateModal.jsx b/frontend/src/components/TranslateModal.jsx similarity index 100% rename from dashboard/src/components/TranslateModal.jsx rename to frontend/src/components/TranslateModal.jsx diff --git a/dashboard/src/components/UGCGallery.jsx b/frontend/src/components/UGCGallery.jsx similarity index 100% rename from dashboard/src/components/UGCGallery.jsx rename to frontend/src/components/UGCGallery.jsx diff --git a/dashboard/src/config.js b/frontend/src/config.js similarity index 100% rename from dashboard/src/config.js rename to frontend/src/config.js diff --git a/dashboard/src/index.css b/frontend/src/index.css similarity index 100% rename from dashboard/src/index.css rename to frontend/src/index.css diff --git a/dashboard/src/lib/renderInBrowser.js b/frontend/src/lib/renderInBrowser.js similarity index 100% rename from dashboard/src/lib/renderInBrowser.js rename to frontend/src/lib/renderInBrowser.js diff --git a/dashboard/src/main.jsx b/frontend/src/main.jsx similarity index 100% rename from dashboard/src/main.jsx rename to frontend/src/main.jsx diff --git a/dashboard/src/remotion/compositions/HookOverlay.tsx b/frontend/src/remotion/compositions/HookOverlay.tsx similarity index 100% rename from dashboard/src/remotion/compositions/HookOverlay.tsx rename to frontend/src/remotion/compositions/HookOverlay.tsx diff --git a/dashboard/src/remotion/compositions/ShortVideo.tsx b/frontend/src/remotion/compositions/ShortVideo.tsx similarity index 100% rename from dashboard/src/remotion/compositions/ShortVideo.tsx rename to frontend/src/remotion/compositions/ShortVideo.tsx diff --git a/dashboard/src/remotion/compositions/Subtitles.tsx b/frontend/src/remotion/compositions/Subtitles.tsx similarity index 100% rename from dashboard/src/remotion/compositions/Subtitles.tsx rename to frontend/src/remotion/compositions/Subtitles.tsx diff --git a/dashboard/src/remotion/compositions/VideoEffects.tsx b/frontend/src/remotion/compositions/VideoEffects.tsx similarity index 100% rename from dashboard/src/remotion/compositions/VideoEffects.tsx rename to frontend/src/remotion/compositions/VideoEffects.tsx diff --git a/dashboard/src/remotion/lib/captions.ts b/frontend/src/remotion/lib/captions.ts similarity index 100% rename from dashboard/src/remotion/lib/captions.ts rename to frontend/src/remotion/lib/captions.ts diff --git a/dashboard/src/remotion/lib/fonts.ts b/frontend/src/remotion/lib/fonts.ts similarity index 100% rename from dashboard/src/remotion/lib/fonts.ts rename to frontend/src/remotion/lib/fonts.ts diff --git a/dashboard/src/remotion/lib/types.ts b/frontend/src/remotion/lib/types.ts similarity index 100% rename from dashboard/src/remotion/lib/types.ts rename to frontend/src/remotion/lib/types.ts diff --git a/dashboard/tailwind.config.js b/frontend/tailwind.config.js similarity index 100% rename from dashboard/tailwind.config.js rename to frontend/tailwind.config.js diff --git a/dashboard/vite.config.js b/frontend/vite.config.js similarity index 100% rename from dashboard/vite.config.js rename to frontend/vite.config.js diff --git a/hooks.py b/hooks.py deleted file mode 100644 index b096f4fd..00000000 --- a/hooks.py +++ /dev/null @@ -1,15 +0,0 @@ -"""Compat shim: re-exports openshorts.overlays.hooks at the original import path. - -This module moved to openshorts/overlays/hooks.py as part of the restructure. -New code should import from `openshorts.overlays.hooks` directly; this shim -keeps existing `from hooks import ...` calls working. -""" -from openshorts.overlays.hooks import * # noqa: F401,F403 -from openshorts.overlays.hooks import ( # noqa: F401 - FONT_URL, - FONT_DIR, - FONT_PATH, - download_font_if_needed, - create_hook_image, - add_hook_to_video, -) diff --git a/openshorts/app.py b/openshorts/app.py deleted file mode 100644 index a8a0ece6..00000000 --- a/openshorts/app.py +++ /dev/null @@ -1,23 +0,0 @@ -"""FastAPI application entrypoint for the openshorts package. - -This module exposes the FastAPI ``app`` instance used by uvicorn: - - uvicorn openshorts.app:app --host 0.0.0.0 --port 8000 - -The actual route handlers still live in the root-level ``app.py`` during the -restructure. A future commit will split that monolith into the planned router -modules under openshorts/routes/ (process, editing, subtitles, hooks, -translation, thumbnails, saasshorts, audio, layouts, motion_graphics, social). -Until then this module simply re-exports the existing FastAPI instance so the -Dockerfile / docker-compose entrypoint can target the package path. -""" -import os -import sys - -# Make sure the repo root is on sys.path so `import app` resolves to the -# original root-level app.py rather than this package's own openshorts/app.py. -_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) -if _REPO_ROOT not in sys.path: - sys.path.insert(0, _REPO_ROOT) - -from app import app # noqa: E402,F401 diff --git a/remotion/.gitignore b/renderer/compositions/.gitignore similarity index 100% rename from remotion/.gitignore rename to renderer/compositions/.gitignore diff --git a/remotion/package.json b/renderer/compositions/package.json similarity index 100% rename from remotion/package.json rename to renderer/compositions/package.json diff --git a/remotion/public/fonts/NotoSerif-Bold.ttf b/renderer/compositions/public/fonts/NotoSerif-Bold.ttf similarity index 100% rename from remotion/public/fonts/NotoSerif-Bold.ttf rename to renderer/compositions/public/fonts/NotoSerif-Bold.ttf diff --git a/remotion/src/Root.tsx b/renderer/compositions/src/Root.tsx similarity index 100% rename from remotion/src/Root.tsx rename to renderer/compositions/src/Root.tsx diff --git a/remotion/src/compositions/HookOverlay.tsx b/renderer/compositions/src/compositions/HookOverlay.tsx similarity index 100% rename from remotion/src/compositions/HookOverlay.tsx rename to renderer/compositions/src/compositions/HookOverlay.tsx diff --git a/remotion/src/compositions/ShortVideo.tsx b/renderer/compositions/src/compositions/ShortVideo.tsx similarity index 100% rename from remotion/src/compositions/ShortVideo.tsx rename to renderer/compositions/src/compositions/ShortVideo.tsx diff --git a/remotion/src/compositions/Subtitles.tsx b/renderer/compositions/src/compositions/Subtitles.tsx similarity index 100% rename from remotion/src/compositions/Subtitles.tsx rename to renderer/compositions/src/compositions/Subtitles.tsx diff --git a/remotion/src/compositions/VideoEffects.tsx b/renderer/compositions/src/compositions/VideoEffects.tsx similarity index 100% rename from remotion/src/compositions/VideoEffects.tsx rename to renderer/compositions/src/compositions/VideoEffects.tsx diff --git a/remotion/src/index.ts b/renderer/compositions/src/index.ts similarity index 100% rename from remotion/src/index.ts rename to renderer/compositions/src/index.ts diff --git a/remotion/src/lib/captions.ts b/renderer/compositions/src/lib/captions.ts similarity index 100% rename from remotion/src/lib/captions.ts rename to renderer/compositions/src/lib/captions.ts diff --git a/remotion/src/lib/fonts.ts b/renderer/compositions/src/lib/fonts.ts similarity index 100% rename from remotion/src/lib/fonts.ts rename to renderer/compositions/src/lib/fonts.ts diff --git a/remotion/src/lib/types.ts b/renderer/compositions/src/lib/types.ts similarity index 100% rename from remotion/src/lib/types.ts rename to renderer/compositions/src/lib/types.ts diff --git a/remotion/tsconfig.json b/renderer/compositions/tsconfig.json similarity index 100% rename from remotion/tsconfig.json rename to renderer/compositions/tsconfig.json diff --git a/render-service/.gitignore b/renderer/service/.gitignore similarity index 100% rename from render-service/.gitignore rename to renderer/service/.gitignore diff --git a/render-service/Dockerfile b/renderer/service/Dockerfile similarity index 61% rename from render-service/Dockerfile rename to renderer/service/Dockerfile index f9a03f62..0e3b25b0 100644 --- a/render-service/Dockerfile +++ b/renderer/service/Dockerfile @@ -12,17 +12,17 @@ ENV OUTPUT_DIR=/output WORKDIR /app # Copy render service source -COPY render-service/package.json ./ +COPY renderer/service/package.json ./ RUN npm install -COPY render-service/tsconfig.json ./ -COPY render-service/src/ ./src/ +COPY renderer/service/tsconfig.json ./ +COPY renderer/service/src/ ./src/ # Copy remotion source for bundling -COPY remotion/package.json /app/remotion/package.json -COPY remotion/tsconfig.json /app/remotion/tsconfig.json -COPY remotion/src/ /app/remotion/src/ -COPY remotion/public/ /app/remotion/public/ +COPY renderer/compositions/package.json /app/remotion/package.json +COPY renderer/compositions/tsconfig.json /app/remotion/tsconfig.json +COPY renderer/compositions/src/ /app/remotion/src/ +COPY renderer/compositions/public/ /app/remotion/public/ # Install remotion deps RUN cd /app/remotion && npm install diff --git a/render-service/package-lock.json b/renderer/service/package-lock.json similarity index 100% rename from render-service/package-lock.json rename to renderer/service/package-lock.json diff --git a/render-service/package.json b/renderer/service/package.json similarity index 100% rename from render-service/package.json rename to renderer/service/package.json diff --git a/render-service/src/bundle.ts b/renderer/service/src/bundle.ts similarity index 100% rename from render-service/src/bundle.ts rename to renderer/service/src/bundle.ts diff --git a/render-service/src/render-worker.ts b/renderer/service/src/render-worker.ts similarity index 100% rename from render-service/src/render-worker.ts rename to renderer/service/src/render-worker.ts diff --git a/render-service/src/server.ts b/renderer/service/src/server.ts similarity index 100% rename from render-service/src/server.ts rename to renderer/service/src/server.ts diff --git a/render-service/tsconfig.json b/renderer/service/tsconfig.json similarity index 100% rename from render-service/tsconfig.json rename to renderer/service/tsconfig.json diff --git a/s3_uploader.py b/s3_uploader.py deleted file mode 100644 index 59b46449..00000000 --- a/s3_uploader.py +++ /dev/null @@ -1,19 +0,0 @@ -"""Compat shim: re-exports openshorts.integrations.s3 at the original import path. - -This module moved to openshorts/integrations/s3.py as part of the restructure. -New code should import from `openshorts.integrations.s3` directly; this shim -keeps existing imports (e.g. `from s3_uploader import upload_job_artifacts`) -working while the restructure is in flight. -""" -from openshorts.integrations.s3 import * # noqa: F401,F403 -from openshorts.integrations.s3 import ( # noqa: F401 - upload_file_to_s3, - get_s3_client, - generate_presigned_url, - list_all_clips, - upload_actor_to_s3, - list_actor_gallery, - upload_video_to_gallery, - list_video_gallery, - upload_job_artifacts, -) diff --git a/saasshorts.py b/saasshorts.py deleted file mode 100644 index 626ad169..00000000 --- a/saasshorts.py +++ /dev/null @@ -1,22 +0,0 @@ -"""Compat shim: re-exports openshorts.saas.pipeline at the original import path. - -The SaaS UGC pipeline moved to openshorts/saas/pipeline.py as part of the -restructure. A future commit may split it further into research / scripting / -media / compositing / pipeline modules per the plan; for now it lives as a -single module in the saas/ folder. New code should import from -`openshorts.saas.pipeline` directly; this shim keeps existing -`from saasshorts import ...` calls working. -""" -from openshorts.saas.pipeline import * # noqa: F401,F403 -from openshorts.saas.pipeline import ( # noqa: F401 - scrape_website, - research_saas_online, - analyze_saas, - generate_scripts, - generate_full_video, - generate_actor_images, - generate_actor_image, - generate_voiceover, - get_elevenlabs_voices, - DEFAULT_VOICES, -) diff --git a/scripts/update_claude_md.py b/scripts/update_claude_md.py index 62e7d3f0..3d0b744a 100755 --- a/scripts/update_claude_md.py +++ b/scripts/update_claude_md.py @@ -15,7 +15,7 @@ The script: 1. Walks the repo and lists top-level folders (REPO-MAP). -2. Parses every openshorts/*.py module via ast, extracting the one-line +2. Parses every backend/app/*.py module via ast, extracting the one-line docstring + the names of public functions/classes (MODULE-MAP). 3. Reads .env.example and renders the env-vars table (ENV). 4. Locates the markers in CLAUDE.md and rewrites only the content between them. @@ -26,7 +26,7 @@ ====================== This script exits non-zero (with a list of offenders) if any module under -openshorts/ is missing a module docstring. The pre-commit hook will fail the +backend/app/ is missing a module docstring. The pre-commit hook will fail the commit until the developer adds one. This is how the "every module has a one-liner" convention from CLAUDE.md becomes mechanically enforced — without this, CLAUDE.md is just an advisory and drifts. @@ -49,7 +49,7 @@ from typing import List, Tuple REPO_ROOT = Path(__file__).resolve().parent.parent -PACKAGE_ROOT = REPO_ROOT / "openshorts" +PACKAGE_ROOT = REPO_ROOT / "backend" / "app" CLAUDE_MD = REPO_ROOT / "CLAUDE.md" ENV_EXAMPLE = REPO_ROOT / ".env.example" @@ -83,16 +83,13 @@ def replace_between(text: str, start: str, end: str, body: str) -> str: # --------------------------------------------------------------------------- TOP_LEVEL_DESCRIPTIONS = { - "openshorts": "Python package — all backend code lives here.", - "dashboard": "React + Vite frontend (out of scope for the current restructure).", - "remotion": "Remotion compositions (TypeScript) consumed by the render-service.", - "render-service": "Standalone TypeScript microservice that bundles + renders Remotion compositions.", - "fonts": "Committed TTFs (Noto Serif Bold) used by hook overlays.", + "backend": "Python FastAPI service — the API, video pipeline, and tests.", + "frontend": "React + Vite dashboard — the UI users interact with.", + "renderer": "Remotion render microservice (TypeScript) + compositions.", + "assets": "Committed static assets (fonts, screenshots).", "scripts": "Developer tooling (update_claude_md.py, install_hooks.sh).", - "tests": "Pytest suite — unit, API contract, and e2e smoke.", "uploads": "Runtime: incoming video uploads (gitignored).", "output": "Runtime: generated clips and thumbnails (gitignored).", - "screenshots": "Repo screenshots used in README.md.", } @@ -225,7 +222,7 @@ def main() -> int: env_table = build_env_table() if errors: - print("❌ Module docstrings missing — every .py file under openshorts/ " + print("❌ Module docstrings missing — every .py file under backend/app/ " "must start with a one-line module docstring:", file=sys.stderr) for err in errors: print(f" - {err}", file=sys.stderr) diff --git a/subtitles.py b/subtitles.py deleted file mode 100644 index 03c8f6fc..00000000 --- a/subtitles.py +++ /dev/null @@ -1,19 +0,0 @@ -"""Compat shim: re-exports openshorts.overlays.subtitles_* at the original path. - -This module was split into two files as part of the restructure: -- openshorts/overlays/subtitles_generate.py (transcribe + SRT writing) -- openshorts/overlays/subtitles_render.py (FFmpeg subtitles burn-in) - -New code should import from those modules directly. This shim keeps existing -`from subtitles import ...` calls working. -""" -from openshorts.overlays.subtitles_generate import ( # noqa: F401 - transcribe_audio, - generate_srt_from_video, - generate_srt, - format_srt_block, -) -from openshorts.overlays.subtitles_render import ( # noqa: F401 - hex_to_ass_color, - burn_subtitles, -) diff --git a/thumbnail.py b/thumbnail.py deleted file mode 100644 index 4761e1ff..00000000 --- a/thumbnail.py +++ /dev/null @@ -1,16 +0,0 @@ -"""Compat shim: re-exports openshorts.thumbnails.* at the original import path. - -This module was split into three files as part of the restructure: -- openshorts/thumbnails/titles.py (analyze_video_for_titles, refine_titles) -- openshorts/thumbnails/images.py (generate_thumbnail) -- openshorts/thumbnails/descriptions.py (generate_youtube_description) - -New code should import from those modules directly. This shim keeps existing -`from thumbnail import ...` calls working. -""" -from openshorts.thumbnails.titles import ( # noqa: F401 - analyze_video_for_titles, - refine_titles, -) -from openshorts.thumbnails.images import generate_thumbnail # noqa: F401 -from openshorts.thumbnails.descriptions import generate_youtube_description # noqa: F401 diff --git a/translate.py b/translate.py deleted file mode 100644 index 1bb037b9..00000000 --- a/translate.py +++ /dev/null @@ -1,16 +0,0 @@ -"""Compat shim: re-exports openshorts.integrations.elevenlabs at the original path. - -This module moved to openshorts/integrations/elevenlabs.py as part of the -restructure. New code should import from `openshorts.integrations.elevenlabs` -directly; this shim keeps existing `from translate import ...` calls working. -""" -from openshorts.integrations.elevenlabs import * # noqa: F401,F403 -from openshorts.integrations.elevenlabs import ( # noqa: F401 - SUPPORTED_LANGUAGES, - ELEVENLABS_API_BASE, - create_dubbing_project, - get_dubbing_status, - download_dubbed_video, - translate_video, - get_supported_languages, -) diff --git a/verify_aesthetic.py b/verify_aesthetic.py deleted file mode 100644 index b288f86e..00000000 --- a/verify_aesthetic.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -import shutil -# Check if PIL is installed, if not we can't run this locally but it will run in docker -try: - from hooks import create_hook_image -except ImportError: - print("⚠️ PIL not found locally. Please run this inside the Docker container.") - # Mocking for local check if needed or just exit - exit(1) - -def verify(): - print("🧪 Verifying Hook Aesthetics...") - - test_text = "POV: You are testing\nthe new aesthetic feature\nwith explicit lines." - output_path = "aesthetic_hook.png" - target_width = 800 - - try: - path, w, h = create_hook_image(test_text, target_width, output_image_path=output_path) - - print(f"✅ Image generated at {path}") - print(f" Dimensions including shadow: {w}x{h}") - - # Verify it's larger than the text box would be (due to shadow/padding) - # Just rudimentary checks - if not os.path.exists(path): - print("❌ File does not exist") - return False - - print("✨ Verification Successful! (Inspect aesthetic_hook.png visually)") - return True - except Exception as e: - print(f"❌ Verification Failed: {e}") - return False - -if __name__ == "__main__": - verify() diff --git a/verify_custom_hook.py b/verify_custom_hook.py deleted file mode 100644 index e645927e..00000000 --- a/verify_custom_hook.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -try: - from hooks import create_hook_image -except ImportError: - print("⚠️ PIL not found locally. Run inside Docker.") - exit(1) - -def verify(): - print("🧪 Verifying Hook Customization...") - test_text = "Custom Position\n& Size Test" - - # Test 1: Small + Top - print(" Testing Small + Top...") - p1, w1, h1 = create_hook_image(test_text, 800, "hook_small.png", font_scale=0.8) - print(f" ✅ Small: {w1}x{h1}") - - # Test 2: Large + Center - print(" Testing Large...") - p2, w2, h2 = create_hook_image(test_text, 800, "hook_large.png", font_scale=1.3) - print(f" ✅ Large: {w2}x{h2}") - - if w2 > w1 and h2 > h1: - print(" ✅ Scaling logic works (Large > Small)") - else: - print(" ❌ Scaling logic failed") - - # Cleanup - if os.path.exists(p1): os.remove(p1) - if os.path.exists(p2): os.remove(p2) - -if __name__ == "__main__": - verify() diff --git a/verify_hooks.py b/verify_hooks.py deleted file mode 100644 index 9c7a51c7..00000000 --- a/verify_hooks.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -import shutil -from hooks import create_hook_image - -def verify(): - print("🧪 Verifying Hook Image Generation...") - - test_text = "POV: You are testing the viral hook feature\nand it works perfectly." - output_path = "test_hook.png" - target_width = 800 - - try: - path, w, h = create_hook_image(test_text, target_width, output_image_path=output_path) - - print(f"✅ Image generated at {path}") - print(f" Dimensions: {w}x{h}") - - if not os.path.exists(path): - print("❌ File does not exist") - return False - - if os.path.getsize(path) == 0: - print("❌ File is empty") - return False - - print("✨ Verification Successful!") - return True - except Exception as e: - print(f"❌ Verification Failed: {e}") - return False - finally: - if os.path.exists(output_path): - os.remove(output_path) - -if __name__ == "__main__": - verify() From 3d2b4f844d3692328f018892b087c5d13a6d1002 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 21:22:37 -0400 Subject: [PATCH 20/43] feat(brand-kit): brand kit settings + font upload + port refresh Brand Kit feature: 9-anchor position grid, per-ratio (9:16/16:9) sizing, font upload (system/bundled/user), and live chunk-cycling preview. Subtitle + Hook modals pre-fill from the live brand kit via useBrandKit so changes propagate without a page reload. Backend: new font endpoints (/api/fonts, /api/fonts/upload, /api/fonts/file/*), words_per_line threaded through SubtitleRequest -> generate_srt(max_words). OpenAPI snapshot regenerated for the new font routes. Also bundles the previous session's port refresh (3001/3002/3003 host mappings) and the HANDOFF.md briefing doc. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- .env.example | 6 +- CLAUDE.md | 10 +- HANDOFF.md | 689 ++++++++++++++++++ README.md | 2 +- assets/fonts/user/.gitkeep | 0 backend/app/main.py | 129 +++- backend/app/overlays/subtitles_generate.py | 28 +- backend/tests/snapshots/baseline.openapi.json | 166 +++++ docker-compose.yml | 8 +- frontend/src/components/BrandKit.jsx | 218 ++++++ frontend/src/components/BrandPreview.jsx | 211 ++++++ frontend/src/components/FontPicker.jsx | 142 ++++ frontend/src/components/HookModal.jsx | 19 +- frontend/src/components/PositionGrid.jsx | 55 ++ frontend/src/components/ResultCard.jsx | 2 + frontend/src/components/SubtitleModal.jsx | 41 +- frontend/src/lib/brandKit.js | 191 +++++ 17 files changed, 1890 insertions(+), 27 deletions(-) create mode 100644 HANDOFF.md create mode 100644 assets/fonts/user/.gitkeep create mode 100644 frontend/src/components/BrandKit.jsx create mode 100644 frontend/src/components/BrandPreview.jsx create mode 100644 frontend/src/components/FontPicker.jsx create mode 100644 frontend/src/components/PositionGrid.jsx create mode 100644 frontend/src/lib/brandKit.js diff --git a/.env.example b/.env.example index 09e6e132..9a0ebedc 100644 --- a/.env.example +++ b/.env.example @@ -32,7 +32,9 @@ DISABLE_YOUTUBE_URL=false # --- Optional: Remotion render service -------------------------------------- -# URL of the render-service container (only used by /api/render). +# Used by the backend to call the renderer. +# Default uses Docker's internal network (service name + container port). +# If you run the backend OUTSIDE Docker, change to: http://localhost:3003 RENDER_SERVICE_URL=http://renderer:3100 # --- Tuning ----------------------------------------------------------------- @@ -45,7 +47,7 @@ MAX_CONCURRENT_JOBS=5 # ============================================================================= # Production API URL override (defaults to relative paths in dev). -VITE_API_URL=http://localhost:8000 +VITE_API_URL=http://localhost:3002 # Optional salt for localStorage API-key encryption. # VITE_ENCRYPTION_KEY= diff --git a/CLAUDE.md b/CLAUDE.md index a4c695f3..78566c83 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -30,16 +30,16 @@ Each top-level folder is self-contained: `backend/` has its own `Dockerfile` and ```bash # Full stack (recommended) docker compose up --build -# Frontend → http://localhost:5175 -# Backend → http://localhost:8000 -# Renderer → http://localhost:3100 +# Frontend → http://localhost:3001 +# Backend → http://localhost:3002 +# Renderer → http://localhost:3003 # Backend only (local dev — needs Python 3.11+ and FFmpeg on PATH) cd backend pip install -r requirements.txt -r requirements-dev.txt pip install -e . pytest -m "not e2e" # unit + API contract suite (~0.6s) -uvicorn app.main:app --host 0.0.0.0 --port 8000 +uvicorn app.main:app --host 0.0.0.0 --port 3002 # Frontend only cd frontend && npm install && npm run dev @@ -201,7 +201,7 @@ Server-side env vars the code actually reads. **Auto-managed** — generated fro | `YOUTUBE_COOKIES` | _(unset)_ | Optional: YouTube ingestion (commented — optional) | | `RENDER_SERVICE_URL` | `http://renderer:3100` | Optional: Remotion render service | | `MAX_CONCURRENT_JOBS` | `5` | Tuning | -| `VITE_API_URL` | `http://localhost:8000` | Tuning | +| `VITE_API_URL` | `http://localhost:3002` | Tuning | | `VITE_ENCRYPTION_KEY` | _(unset)_ | Tuning (commented — optional) | | `ELEVENLABS_API_KEY` | _(unset)_ | Tuning (commented — optional) | | `UPLOAD_POST_API_KEY` | _(unset)_ | Tuning (commented — optional) | diff --git a/HANDOFF.md b/HANDOFF.md new file mode 100644 index 00000000..4ca2a199 --- /dev/null +++ b/HANDOFF.md @@ -0,0 +1,689 @@ +# OpenShorts — Full Project Handoff + +A self-contained briefing for the next agent (human or LLM) picking up this codebase. Reads top-to-bottom. + +--- + +## 1. What OpenShorts is + +OpenShorts is an **AI-powered vertical short-video generator**. Drop in a long video (YouTube URL or local upload) and it produces 3–15 viral 9:16 clips ready for TikTok / Reels / Shorts. + +The hot path: +1. Transcribe audio locally (faster-whisper, INT8). +2. Detect scene boundaries (PySceneDetect). +3. Send transcript to **Gemini 2.5 Flash** → returns 3–15 viral moments with start/end times and titles. +4. Cut each clip with FFmpeg. +5. Per-scene reframe to 9:16 — either tracking the active speaker (MediaPipe face + YOLOv8 person) or a panoramic blurred-background ("General") composite. +6. Optional layers: AI-generated FFmpeg effects, text hook PNGs, burn-in subtitles, ElevenLabs voice dubbing, S3 backup, Upload-Post distribution to socials. + +Additional dashboards in the same app: +- **AI Agent** (Claude Code skill — runs from terminal, auto-clips a folder of long verticals). +- **UGC Gallery** + **SaaSShorts** (AI talking-head ad generator using fal.ai Flux + Kling). +- **YouTube Studio** (Gemini-generated titles, thumbnails, descriptions for long-form YouTube). +- **Settings** (API key paste + **Brand Kit**). + +--- + +## 2. Current state (snapshot) + +| | | +| --- | --- | +| **Branch** | `chore/restructure-and-docs` (19 commits ahead of `main`) | +| **Revert point** | `git reset --hard pre-restructure-20260519-1526` | +| **Tests** | 62/62 green (`cd backend && pytest -m "not e2e"`, ~0.6s) | +| **OpenAPI baseline** | `backend/tests/snapshots/baseline.openapi.json` (35 endpoints) | +| **Docker** | All three services running | +| **Frontend URL** | http://localhost:3001 | +| **Backend API** | http://localhost:3002 | +| **Renderer** | http://localhost:3003 | +| **Uncommitted work** | Brand Kit feature — fully working, not yet committed. See section 9. | + +### What's uncommitted right now +``` + M .env.example (RENDER_SERVICE_URL comment) + M CLAUDE.md (port refs + path refs) + M README.md (port refs) + M backend/app/main.py (font endpoints + words_per_line on SubtitleRequest) + M backend/app/overlays/subtitles_generate.py (max_words param threaded through generate_srt) + M backend/tests/snapshots/baseline.openapi.json (regenerated for new routes) + M docker-compose.yml (port mappings, volume mount fix) + M frontend/src/App.jsx (BrandKit import + Settings layout widened) + M frontend/src/components/HookModal.jsx (uses useBrandKit hook) + M frontend/src/components/ResultCard.jsx (sends words_per_line on /api/subtitle) + M frontend/src/components/SubtitleModal.jsx (uses useBrandKit hook) +?? assets/fonts/user/ (user-uploaded fonts target dir) +?? frontend/src/components/BrandKit.jsx (NEW — main brand kit UI block) +?? frontend/src/components/BrandPreview.jsx (NEW — canvas live preview) +?? frontend/src/components/FontPicker.jsx (NEW — scrollable in-font font picker) +?? frontend/src/components/PositionGrid.jsx (NEW — 3x3 anchor selector) +?? frontend/src/lib/brandKit.js (NEW — localStorage + hook + helpers) +``` + +The user has NOT been asked to commit yet. Ask before committing. + +--- + +## 3. Top-level architecture + +``` +openshorts/ ← repo root +├── backend/ 🐍 Python FastAPI (Python 3.11 in Docker, ≥ 3.9 locally) +├── frontend/ ⚛️ React + Vite + Tailwind (Node 18 in Docker) +├── renderer/ 🎬 Remotion render microservice (TypeScript) +├── assets/ 🖼️ Committed static files (fonts + screenshots) +├── scripts/ 🛠️ Dev tooling (CLAUDE.md auto-updater + pre-commit hook installer) +├── output/ (gitignored runtime) +├── uploads/ (gitignored runtime) +├── docker-compose.yml +├── README.md +├── CLAUDE.md ← auto-managed sections regenerated by pre-commit hook +├── ROADMAP.md +├── LICENSE +└── .env.example ← template; copy to .env before running +``` + +This split follows **`fastapi/full-stack-fastapi-template`** (the official tiangolo template) — the canonical FastAPI+React monorepo layout. Each top-level folder is self-contained and has its own Dockerfile where relevant. Docker Compose orchestrates the three deployable services. + +--- + +## 4. Directory map (full) + +### `backend/` + +``` +backend/ +├── Dockerfile # Python 3.11-slim + ffmpeg + libgl1 + node + yt-dlp +├── pyproject.toml # package "app" found by setuptools +├── requirements.txt # runtime deps (FastAPI, fastwhisper, ultralytics…) +├── requirements-dev.txt # pytest, httpx, respx, vcrpy +└── app/ # the Python package (importable as `app`) + ├── __init__.py + ├── main.py # FastAPI app + 35 routes (was: root app.py, 2256 lines) + ├── cli.py # CLI: python -m app.cli -i input.mp4 -o output/ + │ + ├── core/ # Cross-cutting infra (job queue, store, api-key resolver) — scaffolded + ├── routes/ # Per-domain routers — scaffolded (routes still in main.py) + │ + ├── video/ # Core video processing + │ ├── pipeline.py # process_video_to_vertical (orchestrator + per-frame loop) + │ ├── tracking.py # SmoothedCameraman, SpeakerTracker (heart of the pipeline) + │ ├── scene_analysis.py # detect_scenes, analyze_scenes_strategy (TRACK vs GENERAL) + │ ├── reframing.py # create_general_frame (blurred-background composite) + │ └── ffmpeg.py # Single FFmpeg wrapper (NEW — migration in-flight; existing + │ # subprocess.run(ffmpeg) calls NOT yet migrated) + │ + ├── ml/ # AI inference + │ ├── detection.py # MediaPipe BlazeFace + YOLOv8n + │ ├── transcription.py # faster-whisper INT8 (CPU) + │ └── viral_extraction.py # Gemini 2.5 Flash + cost analysis + │ + ├── ingest/ + │ └── youtube.py # yt-dlp with bot-detection workarounds (tv_embed, android, mweb) + │ + ├── editing/ + │ ├── ai_filters.py # VideoEditor — Gemini-driven FFmpeg filter generation + │ └── prompts.py # build_ffmpeg_filter_prompt, build_effects_config_prompt + │ + ├── overlays/ + │ ├── hooks.py # Hook PNG generation (PIL) + FFmpeg burn-in + │ ├── subtitles_generate.py # SRT generation (words → grouped lines) + │ └── subtitles_render.py # subtitles filter (FFmpeg) + ASS color conversion + │ + ├── thumbnails/ + │ ├── titles.py # Gemini viral title generation + refinement loop + │ ├── images.py # Gemini multimodal image preview (thumbnail gen) + │ └── descriptions.py # YouTube description + chapter markers + │ + ├── saas/ + │ └── pipeline.py # SaaSShorts UGC generator (research → script → media → composite) + │ # 1474 lines — internal split deferred (see ROADMAP) + │ + ├── integrations/ + │ ├── s3.py # AWS S3 (clip backup + public gallery + presigned URLs) + │ └── elevenlabs.py # Dubbing API (30+ languages) + SUPPORTED_LANGUAGES dict + │ + ├── prompts/ # Externalized Gemini prompts (.md files) + ├── models/ # Pydantic schemas (scaffolded — current schemas inline in main.py) + ├── utils/ + │ └── filters.py # Shared FFmpeg filter helpers (sanitize, chain split, zoompan enforce) + │ + ├── audio/ # ROADMAP feature A (soundtracks + ducking) — scaffolded + ├── layouts/ # ROADMAP feature B (template layouts) — scaffolded + └── motion_graphics/ # ROADMAP feature C (animated overlays) — scaffolded + └── library/ # individual effect modules + +└── tests/ # Characterization suite (62 tests) + ├── conftest.py # sys.modules stubs (cv2, mediapipe, ultralytics, torch, etc.) + ├── unit/ # ~5 modules, pure-Python, fast + ├── api/test_openapi_contract.py # OpenAPI snapshot — pins all 35 routes + ├── e2e/test_pipeline_smoke.py # Skipped by default; needs real ffmpeg + fixtures + ├── snapshots/baseline.openapi.json # The contract — 35-endpoint surface + └── fixtures/ # Test videos (small, deterministic) +``` + +### `frontend/` + +``` +frontend/ +├── Dockerfile # node:18-alpine + Vite dev server +├── package.json +├── vite.config.js # Proxies /api, /videos, /thumbnails, /gallery, /video → backend:8000 +│ # Proxies /render → renderer:3100 (internal Docker network) +├── tailwind.config.js +├── index.html +└── src/ + ├── App.jsx # Tab switcher + state (jobs, api keys, etc.) + ├── Landing.jsx # Marketing landing page + ├── Legal.jsx # Terms & Privacy + ├── main.jsx, index.css, App.css, config.js + ├── lib/ + │ ├── brandKit.js # NEW — localStorage helpers + useBrandKit hook + wrapByWords + │ └── renderInBrowser.js + ├── components/ + │ ├── KeyInput.jsx # API-key paste for Gemini + │ ├── BrandKit.jsx # NEW — main brand kit UI + │ ├── BrandPreview.jsx # NEW — canvas live preview with chunk cycling + │ ├── FontPicker.jsx # NEW — scrollable font list (each name in its own font) + │ ├── PositionGrid.jsx # NEW — 3x3 anchor selector + │ ├── MediaInput.jsx # URL / file upload box + │ ├── ResultCard.jsx # Generated clip viewer + │ ├── HookModal.jsx # Hook overlay editor (pre-fills from brand kit) + │ ├── SubtitleModal.jsx # Subtitle burn-in editor (pre-fills from brand kit) + │ ├── TranslateModal.jsx # ElevenLabs dubbing + │ ├── ThumbnailStudio.jsx # YouTube Studio panel + │ ├── SaaShortsTab.jsx # SaaS UGC pipeline UI + │ ├── UGCGallery.jsx # UGC video browser + │ ├── Gallery.jsx, GalleryCard.jsx + │ ├── RemotionPreview.jsx # Remotion in-browser preview wrapper + │ ├── ProcessingAnimation.jsx + │ └── ScheduleWeekModal.jsx + └── remotion/ # In-browser Remotion compositions +``` + +### `renderer/` + +``` +renderer/ +├── service/ # Standalone TypeScript microservice +│ ├── Dockerfile # node:18 + Chromium + ffmpeg +│ ├── package.json +│ ├── tsconfig.json +│ └── src/ +│ ├── server.ts # HTTP server on 3100 +│ ├── bundle.ts # Bundles Remotion compositions +│ └── render-worker.ts # Headless Chromium renderer +└── compositions/ # Remotion compositions (TSX) + ├── package.json + └── src/ # All composition files +``` + +### `assets/`, `scripts/`, runtime dirs + +``` +assets/ +├── fonts/ +│ ├── NotoSerif-Bold.ttf # Used by hook overlays +│ └── user/ # User-uploaded fonts (brand kit, persistent) +└── screenshots/ # For README.md only + +scripts/ +├── update_claude_md.py # AST-based; regenerates 3 auto-managed CLAUDE.md sections +└── install_hooks.sh # One-time pre-commit hook installer + +output/, uploads/ # Runtime (gitignored) +``` + +--- + +## 5. Tech stack + +### Backend (Python 3.11) +- **FastAPI** + Uvicorn — async API +- **faster-whisper** — local speech-to-text (CPU INT8) +- **MediaPipe BlazeFace** + **YOLOv8n** (ultralytics) — face/person detection +- **PySceneDetect** — scene boundary detection +- **OpenCV** — frame I/O +- **yt-dlp** — YouTube ingest +- **FFmpeg** (system binary) — encode/overlay/burn/mux +- **httpx**, **boto3**, **google-genai** — API clients +- **Pillow** — hook image rendering + +### Frontend (Node 18) +- **React 18**, **Vite 4**, **Tailwind CSS 3.4** +- **lucide-react** for icons +- **Remotion** compositions (in-browser preview + server-side render) + +### External services (paid APIs; user supplies keys) +| Service | Used for | Where | +| --- | --- | --- | +| **Google Gemini 2.5 Flash** | Viral moment extraction, video effects, thumbnail titles, descriptions | `app.ml.viral_extraction`, `app.editing`, `app.thumbnails` | +| **ElevenLabs Dubbing** | Voice translation 30+ languages | `app.integrations.elevenlabs` | +| **fal.ai** (Flux + Kling/Hailuo) | SaaSShorts UGC actor generation | `app.saas.pipeline` | +| **Upload-Post** | Social media distribution | `app.main` (`/api/social/*`) | +| **AWS S3** | Clip backup + public gallery | `app.integrations.s3` | + +### Cost story +- **All transcription / face tracking / scene detection / video processing runs LOCALLY** — no API cost. +- **Gemini is the only required paid API** (has free tier; personal use likely $0). +- Everything else (ElevenLabs / fal.ai / Upload-Post / S3) is opt-in. + +--- + +## 6. APIs + +### Server-side env vars (`.env`) +Only one is required: `GEMINI_API_KEY`. See `.env.example` for the full template. + +```bash +# Required +GEMINI_API_KEY= + +# Optional +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +AWS_REGION=eu-west-3 +AWS_S3_BUCKET= +AWS_S3_PUBLIC_BUCKET= +YOUTUBE_COOKIES= +DISABLE_YOUTUBE_URL=false +RENDER_SERVICE_URL=http://renderer:3100 # Docker INTERNAL URL — do NOT change to localhost:3003 +MAX_CONCURRENT_JOBS=5 +VITE_API_URL=http://localhost:3002 +``` + +### Client-side keys (NOT in .env) +- `ELEVENLABS_API_KEY`, `UPLOAD_POST_API_KEY`, `FAL_KEY` — these are pasted into the **dashboard UI**, encrypted in browser `localStorage`, and sent as HTTP headers per request (`X-ElevenLabs-Key`, `X-Upload-Post-Key`, `X-Fal-Key`). The backend **never stores them**. + +### Internal HTTP surface (35 endpoints; full list in `backend/tests/snapshots/baseline.openapi.json`) +Key routes: +- `POST /api/process` — submit a video for processing +- `GET /api/status/{job_id}` — poll status +- `POST /api/edit` — apply AI-generated FFmpeg filters +- `POST /api/subtitle` — generate + burn subtitles (accepts `words_per_line` from brand kit) +- `POST /api/hook` — burn text hook PNG +- `POST /api/translate` — ElevenLabs dubbing +- `POST /api/effects/generate`, `POST /api/render/{render_id}` — Remotion render pipeline +- `POST /api/thumbnail/*` — YouTube thumbnail workflow +- `POST /api/saasshorts/*` — SaaS UGC pipeline +- `POST /api/social/post` — Upload-Post distribution +- **NEW** `GET /api/fonts` — list system + bundled + user-uploaded fonts +- **NEW** `POST /api/fonts/upload` — multipart `.ttf/.otf/.woff/.woff2` upload (10 MB cap) +- **NEW** `GET /api/fonts/file/{name}` — serve bundled font +- **NEW** `GET /api/fonts/file/user/{name}` — serve user-uploaded font + +--- + +## 7. Processing pipeline (11 stages) + +1. **Ingest** — `app/ingest/youtube.py:download_youtube_video()` or local upload +2. **Transcribe** — `app/ml/transcription.py:transcribe_video()` (faster-whisper, word timestamps) +3. **Scene-detect** — `app/video/scene_analysis.py:detect_scenes()` +4. **Viral extraction** — `app/ml/viral_extraction.py:get_viral_clips()` (Gemini picks 3–15 clips, 15–60 s each) +5. **Cut clips** — FFmpeg `-ss`/`-to` +6. **Strategy** — `app/video/scene_analysis.py:analyze_scenes_strategy()` (TRACK vs GENERAL per scene) +7. **Reframe** — `app/video/pipeline.py:process_video_to_vertical()` (per-frame loop with `SmoothedCameraman`) +8. **Effects** (optional) — `app/editing/ai_filters.py:VideoEditor` +9. **Hooks + subtitles** (optional) — `app/overlays/*` +10. **Translate** (optional) — `app/integrations/elevenlabs.py:translate_video()` +11. **Backup + distribute** — S3 + Upload-Post + +--- + +## 8. History of work done in this session + +Single chronological narrative — everything that landed since the user started the session. + +### Phase 0 — Test safety net (commit `2d7eff5`) +Built a characterization test suite BEFORE touching code: +- `tests/conftest.py` with `sys.modules` stubs for heavy ML deps (cv2, mediapipe, ultralytics, torch, yt_dlp, faster_whisper, google.genai, boto3) so unit tests run in milliseconds without needing GPUs or network. +- `tests/unit/` — `test_tracking.py`, `test_filter_sanitization.py`, `test_srt_generation.py`, `test_hook_image.py`, `test_translate_languages.py`. +- `tests/api/test_openapi_contract.py` with the 32-endpoint OpenAPI snapshot (`baseline.openapi.json`). +- `tests/e2e/test_pipeline_smoke.py` — slow canary skipped by default. +- Tagged the pre-restructure state as `pre-restructure-20260519-1526`. Branched to `chore/restructure-and-docs`. + +### Phase 1 — First restructure (commits `d7c5a58` … `84310c7`, 11 commits) +Created an `openshorts/` Python package and moved every root-level monolith into it incrementally: +- `s3_uploader.py` → `openshorts/integrations/s3.py` +- `translate.py` → `openshorts/integrations/elevenlabs.py` +- `hooks.py` → `openshorts/overlays/hooks.py` +- `subtitles.py` → split into `openshorts/overlays/subtitles_{generate,render}.py` +- `editor.py` → `openshorts/editing/ai_filters.py` + `openshorts/utils/filters.py` (shared helpers) + `openshorts/editing/prompts.py` +- `thumbnail.py` → 3 files under `openshorts/thumbnails/` +- `main.py` → split into `video/{pipeline,tracking,scene_analysis,reframing}.py` + `ml/{detection,transcription,viral_extraction}.py` + `ingest/youtube.py` +- `saasshorts.py` → `openshorts/saas/pipeline.py` (single-file move; internal split deferred) +- `openshorts/app.py` — thin re-export of root `app.py`'s FastAPI instance +- Each old root file became a **shim** that re-exported the new paths for backwards compat. +- Added `openshorts/video/ffmpeg.py` wrapper scaffold (migration of existing `subprocess.run(['ffmpeg', ...])` calls deferred). +- Updated `Dockerfile` `CMD` to `uvicorn openshorts.app:app`. + +### Phase 3 — `.env.example` (commit `6496d69`) +Added every env var the code actually reads. Clarified which keys are server-side vs client-side (headers). + +### Phase 4 — Auto-updater tooling (commit `a32e8e5`) +- `scripts/update_claude_md.py` — AST-based parser that regenerates 3 sections of CLAUDE.md between markers (`<!-- AUTO:REPO-MAP:START/END -->`, `<!-- AUTO:MODULE-MAP:START/END -->`, `<!-- AUTO:ENV:START/END -->`). Exits non-zero if any `.py` under the package lacks a module docstring (enforces the "every module has a one-liner" rule). +- `scripts/install_hooks.sh` — wires up the pre-commit hook. +- `.pre-commit-config.yaml` — runs the updater on every commit. + +### Phase 2 — CLAUDE.md rewrite (commits `726bfd3`, `9e68944`) +- Full rewrite with structured sections: Project, Quick start, Where things go (decision table), Repo layout (auto-managed), Backend package, Module map (auto-managed), Processing pipeline, API surface, Environment (auto-managed), Conventions, Pointers. +- Sub-`CLAUDE.md` stubs at directory boundaries: `video/`, `layouts/`, `motion_graphics/`, `audio/`, `prompts/`. + +### Phase 5 — ROADMAP.md (commit `1dd4b9a`) +Designs for three future features in shipping order: +1. **Motion graphics** (animated overlays + multi-effect compositor) — ships first because its compositor is the prerequisite for ducking audio. +2. **Audio soundtracks + ducking** — uses Whisper word timings or FFmpeg `silencedetect` to duck music during speech. +3. **Layout templates** — abstract `Layout` base class; `VerticalPanoramaLayout` wraps current behavior; `EducationalLayout` uses two cameramen (top: source crop, bottom: presenter headshot). + +Also documents deferred refactors: full router split, FFmpeg-wrapper migration, saasshorts internal split, `core/job_store` + `core/api_keys` extraction. + +### Phase 6 — Second restructure (commit `55f0ef1`, the big one) +User pushed back on the leftover root shims and the unclear backend/frontend split. Did the proper monorepo split: +- `openshorts/` (Python package) → `backend/app/` +- Root `app.py` (2256 lines) → `backend/app/main.py` (shim imports rewritten to `app.integrations.s3`, `app.editing.ai_filters`, etc.) +- Root `main.py` CLI → `backend/app/cli.py` +- 9 root `.py` shims + 3 `verify_*.py` scripts → **deleted** +- `tests/` → `backend/tests/` +- `pyproject.toml`, `Dockerfile`, `requirements*.txt` → `backend/` +- `dashboard/` → `frontend/` +- `render-service/` + `remotion/` → `renderer/service/` + `renderer/compositions/` +- `fonts/` + `screenshots/` → `assets/fonts/` + `assets/screenshots/` +- `hooks.py` font path now auto-resolves by walking up the directory tree to find `assets/fonts/` +- `docker-compose.yml` updated for all new paths (entrypoint `app.main:app`) +- `scripts/update_claude_md.py` updated for new layout +- CLAUDE.md fully rewritten with the new layout + +### Phase 7 — Docker + port mapping +- Switched host ports from `5175/8000/3100` → `3001/3002/3003` (consecutive, easier to remember). +- **Container internal ports unchanged** (still 5173/8000/3100). Only host mappings changed. +- Vite proxy config (`vite.config.js`) untouched — it uses Docker internal service names (`http://backend:8000`, `http://renderer:3100`), which is correct. +- Updated `.env`, `.env.example`, README, CLAUDE.md port references. + +### Phase 8 — Brand Kit (UNCOMMITTED — section 9) + +--- + +## 9. Brand Kit feature (latest, UNCOMMITTED) + +The user wanted a Hormozi-style "brand kit" so all subtitle/hook/effect text shares colors, fonts, and styling. Built across three iterations. + +### Architecture + +``` +frontend/src/lib/brandKit.js ← localStorage CRUD + useBrandKit hook + helpers +frontend/src/components/BrandKit.jsx ← the main UI block (rendered in Settings tab) +frontend/src/components/BrandPreview.jsx ← canvas live preview with chunk cycling +frontend/src/components/FontPicker.jsx ← scrollable font list (each name in its own font) +frontend/src/components/PositionGrid.jsx ← 3x3 anchor selector +``` + +### Storage shape (in browser `localStorage` under key `openshorts.brandKit.v2`) + +```js +{ + colors: [ + { name: 'Primary', hex: '#FFFFFF' }, + { name: 'Accent', hex: '#FFD60A' }, + { name: 'Stroke', hex: '#000000' }, + // ...user-added colors + ], + font: { family: 'Inter', source: 'system' | 'bundled' | 'user', url: null | '/api/fonts/file/...' }, + previewText: 'Stop scrolling and watch this insane clip…', + styles: { + '9:16': { size, strokeWidth, textColor, strokeColor, position, wordsPerLine }, + '16:9': { size, strokeWidth, textColor, strokeColor, position, wordsPerLine }, + }, +} +``` + +Auto-migrates from the legacy v1 shape (`{ style: {...} }`) if found. + +### Position uses a 3×3 anchor grid + +Values: `top-left`, `top-center`, `top-right`, `middle-left`, `middle-center`, `middle-right`, `bottom-left`, `bottom-center`, `bottom-right`. Standard Figma pattern. + +### Per-aspect-ratio settings + +**Shared across ratios:** colors + font family (brand identity). +**Per-ratio:** size, stroke width, position, words-per-line (layout-specific). + +Default values: +- **9:16**: size 72 px, stroke 6 px, bottom-center, 2 words/line (Hormozi style) +- **16:9**: size 48 px, stroke 4 px, bottom-center, 10 words/line (full sentences) + +### Live preview behavior (KEY UX) + +The preview shows **one chunk on screen at a time**, cycling every 1.5 s — exactly like real subtitle burn-in flashes one block on screen, not all of them stacked. + +Below the canvas: +- Pause/Play button +- One dot per chunk (current one highlighted wider) +- `N/total` counter +- Clicking a dot jumps to that chunk and pauses + +The preview text is editable. Word count is shown. Reset button restores default. + +### Font upload flow + +- Drag-drop `.ttf/.otf/.woff/.woff2` into the FontPicker dropdown (10 MB cap). +- POST to `/api/fonts/upload` → saves under `assets/fonts/user/` (persistent across container restarts via volume mount). +- `GET /api/fonts` returns catalog: system (curated 9 names: Inter, Roboto, Arial, …), bundled (NotoSerif from `assets/fonts/`), user (anything in `user/`). +- `GET /api/fonts/file/{name}` serves bundled font. +- `GET /api/fonts/file/user/{name}` serves user-uploaded font. +- Browser registers them as `@font-face` via `ensureFontLoaded()` so the picker can render each name in its own typeface. + +### How brand kit flows into actual output + +1. **SubtitleModal** (`frontend/src/components/SubtitleModal.jsx`) uses `useBrandKit()` hook. On open, pre-fills `position`, `fontSize`, `fontName`, `fontColor`, `borderColor`, `borderWidth`, `wordsPerLine` from the live brand kit (the `'9:16'` block — all subtitle output is currently vertical). +2. **HookModal** (`frontend/src/components/HookModal.jsx`) does the same for hook overlay defaults. +3. On submit, `ResultCard.handleSubtitle` posts the full config to `POST /api/subtitle` including `words_per_line`. +4. Backend `SubtitleRequest` Pydantic model (in `backend/app/main.py`) accepts `words_per_line: Optional[int]`. +5. Backend threads it through to `generate_srt(transcript, …, max_words=N)` in `backend/app/overlays/subtitles_generate.py`. If `max_words` is set, SRT blocks are grouped by word count instead of character count. +6. FFmpeg subtitle burn-in renders the SRT — N words flash on screen at a time. + +### Live update mechanism + +`brandKit.js` exposes a `useBrandKit()` React hook that subscribes to: +- `brandKit:changed` custom event (fired on every `saveBrandKit()`) +- `storage` event (fired when localStorage changes in another tab) + +This means: when the user edits the brand kit, any open SubtitleModal/HookModal sees the new values immediately. No restart needed. + +### What's currently NOT wired (deferred — section 12) + +- **Hook overlay backend** doesn't yet accept brand kit text/stroke colors; HookModal uses categorical S/M/L sizes only. To fully respect brand kit, `POST /api/hook` would need new fields (`font_family`, `text_color`, `stroke_color`, `stroke_width`) and `app/overlays/hooks.py:create_hook_image()` would need to use them. Currently it uses the bundled NotoSerif-Bold + hardcoded styling. +- **AI effect text** (Remotion path) doesn't yet honor brand kit. Effect-config generation in Gemini prompts is unaware of the user's brand. Plumbing the brand kit into `app/editing/prompts.py` would fix this. +- **HookModal** maps brand kit position to its simpler top/middle/bottom picker — fine but lossy. A full 9-anchor picker in HookModal would respect horizontal alignment too. + +--- + +## 10. Tests + +```bash +cd backend +pytest -m "not e2e" -q # 62 tests, ~0.6s +pytest -m e2e -q # slow smoke test, needs real ffmpeg + tiny fixture video +``` + +Layout: +- `backend/tests/unit/` — pure-Python, fast (5 modules) +- `backend/tests/api/test_openapi_contract.py` — pins the 35-endpoint contract via JSON snapshot +- `backend/tests/e2e/test_pipeline_smoke.py` — full pipeline end-to-end (skipped by default) + +**Snapshot baseline** lives at `backend/tests/snapshots/baseline.openapi.json`. If you intentionally change the API surface, delete + regenerate: +```bash +cd backend +rm tests/snapshots/baseline.openapi.json +pytest tests/api/test_openapi_contract.py # will fail on first run, dropping new current.openapi.json +cp tests/snapshots/current.openapi.json tests/snapshots/baseline.openapi.json +rm tests/snapshots/current.openapi.json +pytest # should pass now +``` + +**conftest.py** uses `sys.modules` stubs for heavy ML deps. That means the local venv does NOT need torch/mediapipe/ultralytics installed — the tests run anywhere Python+pytest+httpx+PIL+respx are available. + +--- + +## 11. Running the stack + +### Full stack (recommended) +```bash +docker compose up --build +# Frontend (open this): http://localhost:3001 +# Backend API: http://localhost:3002 +# Renderer: http://localhost:3003 +``` + +### Backend only (local Python 3.11) +```bash +cd backend +pip install -r requirements.txt -r requirements-dev.txt +pip install -e . +uvicorn app.main:app --host 0.0.0.0 --port 3002 +``` + +### Frontend only (Node 18) +```bash +cd frontend +npm install +npm run dev +# Vite proxy will fail unless backend is also running +``` + +### Pre-commit hook +```bash +bash scripts/install_hooks.sh +# Regenerates CLAUDE.md auto-sections on every commit; fails commit if any +# app/*.py is missing a module docstring +``` + +### .env setup +```bash +cp .env.example .env +# Edit .env and set GEMINI_API_KEY (only required key) +# Other keys (ElevenLabs, fal.ai, Upload-Post) go in the dashboard UI, not .env +``` + +--- + +## 12. Deferred work / roadmap + +Logged in `ROADMAP.md`. Quick summary: + +### Brand Kit polish +- Wire brand kit text/stroke color into `POST /api/hook` + `app/overlays/hooks.py:create_hook_image()`. +- Plumb brand kit into AI-effect generation (`app/editing/prompts.py`). +- Add a full 9-anchor picker to HookModal (currently uses simpler top/middle/bottom). + +### Three feature designs (in shipping order) + +#### Feature C — Motion Graphics Library (ships first) +- New `app/motion_graphics/base.py:MotionGraphicEffect(ABC)` +- `app/motion_graphics/compositor.py:MotionGraphicsCompositor` — batches multiple effects into a single `filter_complex` so the video re-encodes once, not once per effect. +- Initial library: `LowerThirdsEffect`, `CalloutEffect`, `AnimatedEmojiEffect`, `ProgressBarEffect`. +- API: `GET /api/motion-graphics/library`, `POST /api/motion-graphics/render`. + +#### Feature A — Background Soundtracks + Ducking +- New `app/audio/mixer.py:mix_audio_tracks()` — uses Whisper word timings OR FFmpeg `silencedetect` to compute speech intervals; ducks music by `ducking_db` dB during speech. +- `app/audio/library.py` — reads `assets/music/manifest.json` (genre/mood/length per track). No external API in v1. +- `app/audio/cues.py` — optional Gemini-based SFX moment detection. Prompt at `app/prompts/sfx_cues.md`. +- API: `POST /api/audio/apply` with `{ job_id, clip_index, music_track_id, sfx_cues, ducking_db }`. + +#### Feature B — Layout Templates (biggest) +- New `app/layouts/base.py:Layout(ABC)` with a single `render_frame(frame, detections, frame_number)` method. +- `VerticalPanoramaLayout` wraps current TRACK/GENERAL behavior. +- `EducationalLayout` — two cameramen: top half = source content (screencast-style), bottom half = presenter headshot. +- `SideBySideLayout`, `PictureInPictureLayout` — stubs for future expansion. +- Frame loop in `app/video/pipeline.py` becomes `layout.render_frame(...)` — polymorphic call replaces today's inline TRACK/GENERAL branching. +- Job request gains `layout: "panorama" | "educational"` (default panorama). + +### Deferred refactors +- Full router split: 2256-line `backend/app/main.py` → 11 router modules under `backend/app/routes/` + a `create_app()` factory. +- FFmpeg wrapper migration: every `subprocess.run(['ffmpeg', ...])` call → `app.video.ffmpeg`. +- SaaSShorts internal split: `backend/app/saas/pipeline.py` (1474 lines) → research/scripting/media/compositing/pipeline. +- Extract `app/core/job_store.py` + `app/core/api_keys.py` from `main.py` alongside the router split. +- Frontend restructure (App.jsx is large; centralize an api client) — deliberately out of scope this round. + +--- + +## 13. Conventions + +1. **Single FFmpeg wrapper**: `app/video/ffmpeg.py` exists but most callers still use `subprocess.run(['ffmpeg', ...])` directly. New code MUST use the wrapper; migration of existing code is incremental. +2. **API keys via headers, not env**: client-side keys (Gemini override, ElevenLabs, Upload-Post, fal.ai) arrive on each request as `X-...-Key`. A future `app/core/api_keys.py` will be the only allowed reader. +3. **Prompts as files**: new Gemini prompts go in `app/prompts/<name>.md`. Inline prompts may stay in `app/editing/prompts.py` for tightly-coupled cases. +4. **Every module starts with a docstring**: the pre-commit hook (`scripts/update_claude_md.py`) fails the commit if any `.py` under `backend/app/` lacks one. Single line. +5. **Tests first**: keep `pytest -m "not e2e"` 100% green. The OpenAPI snapshot in `tests/snapshots/baseline.openapi.json` pins the public surface. +6. **No new global dicts in routers**: today, `app/main.py` still owns `jobs`, `thumbnail_sessions`, `publish_jobs`, `saas_jobs`. Centralize into `app/core/job_store.py` (planned). + +--- + +## 14. Known gotchas + +1. **Async event loop warning in tests**: `Queue dispatch error: Task ... got Future attached to a different loop` — appears once in the API contract test. Cosmetic; the test still passes (62/62). Caused by the FastAPI startup event spawning the queue worker; cleanup is missed at teardown. Not blocking. +2. **First Docker build is slow** (5–10 min): backend image is 10.7 GB because of torch + ultralytics + mediapipe. After the first build, cached layers make subsequent rebuilds quick. +3. **`RENDER_SERVICE_URL=http://renderer:3100`** is the **Docker internal** URL. Do not change to `localhost:3003` unless you're running the backend OUTSIDE Docker. The renderer container's internal port is still 3100; only the HOST mapping changed to 3003. +4. **Vite proxy uses internal Docker names** (`http://backend:8000`, `http://renderer:3100`) — these are correct as-is. +5. **`assets/fonts/user/`** is volume-mounted into the backend container for read+write. Uploaded fonts survive container restarts. +6. **The `_bk` module-level snapshot pattern is GONE** — both SubtitleModal and HookModal now use the `useBrandKit()` hook, which subscribes to live changes. If you re-introduce module-level reads, the modals will go stale. +7. **OpenAPI snapshot drift**: any new endpoint or Pydantic field change requires regenerating `baseline.openapi.json` (see section 10). + +--- + +## 15. Useful commands cheatsheet + +```bash +# === Status === +git log --oneline -10 +docker compose ps +curl http://localhost:3002/api/config # backend health + +# === Tests === +cd backend && pytest -m "not e2e" -q # fast suite +cd backend && pytest -v # everything + +# === Restart only one service === +docker compose restart backend # picks up code changes via volume mount +docker compose restart frontend +docker compose restart renderer + +# === Full rebuild === +docker compose down +docker compose up --build + +# === Disk usage === +docker system df +docker system prune -af # nuclear cleanup + +# === Regenerate CLAUDE.md === +python3 scripts/update_claude_md.py # idempotent + +# === API surface inspection === +curl http://localhost:3002/openapi.json | jq '.paths | keys' + +# === Revert everything === +git reset --hard pre-restructure-20260519-1526 # nuclear undo +``` + +--- + +## 16. What to tell the next agent + +If you're handing this to a new LLM: + +1. **The codebase is in a healthy state.** Tests pass. Stack runs. +2. **The Brand Kit work is uncommitted.** Ask the user before committing. Once approved, suggested commit message: + ``` + feat(brand-kit): brand kit settings with 9-anchor positioning, per-ratio styling, font upload, and live chunk-cycling preview + ``` +3. **Don't touch the FFmpeg subprocess calls** unless you're doing the full wrapper migration. There are dozens of callers and they currently all work. +4. **The OpenAPI snapshot test will fail** the moment you add or remove any route. See section 10 for regen. +5. **Read CLAUDE.md and ROADMAP.md** — they're current. +6. **The user's preferred next steps**, in priority order: + a. Polish Brand Kit (wire into hooks + AI effects + HookModal full 9-anchor). + b. Ship motion graphics (feature C) — its compositor is the prerequisite for feature A. + c. Ship soundtracks + ducking (feature A). + d. Ship layout templates (feature B). +7. **Don't merge to `main` until the user explicitly approves** — the branch has 19 commits + the brand kit work; they want to review. + +--- + +*Generated for handoff. Last updated: 2026-05-19. Branch: `chore/restructure-and-docs`.* diff --git a/README.md b/README.md index 6b14e2a5..4b1f5ed1 100644 --- a/README.md +++ b/README.md @@ -194,7 +194,7 @@ docker compose up --build ``` ### 4. Open Dashboard -Navigate to **`http://localhost:5175`** +Navigate to **`http://localhost:3001`** 1. Go to **Settings** and enter your API keys (Gemini, fal.ai, ElevenLabs, Upload-Post) 2. **Clip Generator**: Upload a long-form video to generate viral shorts diff --git a/assets/fonts/user/.gitkeep b/assets/fonts/user/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/main.py b/backend/app/main.py index d38fc7ec..0c904a33 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -566,6 +566,10 @@ class SubtitleRequest(BaseModel): border_width: int = 2 bg_color: str = "#000000" bg_opacity: float = 0.0 + # Max words per subtitle line (0 = no wrap). From brand kit. + words_per_line: Optional[int] = None + # Text case: "original" | "upper" | "lower". From brand kit. + text_case: Optional[str] = None input_filename: Optional[str] = None @@ -812,15 +816,20 @@ async def add_subtitles(req: SubtitleRequest): # Check if this is a dubbed video - if so, transcribe it fresh is_dubbed = filename.startswith("translated_") + # Brand-kit words-per-line: if provided, cap line length to N words. + max_words = req.words_per_line if (req.words_per_line and req.words_per_line > 0) else None + # Brand-kit text case: 'original' (default) | 'upper' | 'lower'. + text_case = req.text_case or 'original' + if is_dubbed: print(f"🎙️ Dubbed video detected, transcribing audio for subtitles...") def run_transcribe_srt(): - return generate_srt_from_video(input_path, srt_path) + return generate_srt_from_video(input_path, srt_path, max_words=max_words, text_case=text_case) loop = asyncio.get_event_loop() success = await loop.run_in_executor(None, run_transcribe_srt) else: - success = generate_srt(transcript, clip_data['start'], clip_data['end'], srt_path) + success = generate_srt(transcript, clip_data['start'], clip_data['end'], srt_path, max_words=max_words, text_case=text_case) if not success: raise HTTPException(status_code=400, detail="No words found for this clip range.") @@ -2258,3 +2267,119 @@ async def saasshorts_voices( ], "source": "defaults", } + + +# ============================================================================ +# Brand Kit: fonts upload + listing + serving +# ============================================================================ + +from fastapi.responses import FileResponse +from pathlib import Path as _Path + +# Resolve assets/fonts relative to repo root (walk up from this file) +def _resolve_assets_fonts_dir() -> _Path: + for parent in _Path(__file__).resolve().parents: + candidate = parent / "assets" / "fonts" + if candidate.is_dir(): + return candidate + raise RuntimeError("assets/fonts directory not found") + + +_ASSETS_FONTS = _resolve_assets_fonts_dir() +_USER_FONTS = _ASSETS_FONTS / "user" +_USER_FONTS.mkdir(parents=True, exist_ok=True) + +_FONT_EXTS = {".ttf", ".otf", ".woff", ".woff2"} +_FONT_MIME = { + ".ttf": "font/ttf", + ".otf": "font/otf", + ".woff": "font/woff", + ".woff2": "font/woff2", +} + + +@app.get("/api/fonts") +async def list_fonts(): + """Return the catalog of available fonts (system + user-uploaded).""" + fonts: List[dict] = [] + + # System fonts ship with Docker image; expose a sensible curated list so the + # UI doesn't have to enumerate the entire filesystem. + system_default = [ + "Inter", "Roboto", "Arial", "Verdana", "Tahoma", + "Times New Roman", "Georgia", "Courier New", "Impact", + ] + for name in system_default: + fonts.append({"name": name, "source": "system", "url": None}) + + # NotoSerif-Bold ships in assets/fonts (used by hook overlays). + for f in sorted(_ASSETS_FONTS.glob("*.ttf")): + if f.parent == _USER_FONTS: + continue + fonts.append({ + "name": f.stem, + "source": "bundled", + "url": f"/api/fonts/file/{f.name}", + }) + + # User-uploaded fonts. + for ext in _FONT_EXTS: + for f in sorted(_USER_FONTS.glob(f"*{ext}")): + fonts.append({ + "name": f.stem, + "source": "user", + "url": f"/api/fonts/file/user/{f.name}", + }) + + return {"fonts": fonts} + + +@app.post("/api/fonts/upload") +async def upload_font(file: UploadFile = File(...)): + """Save a user-uploaded font under assets/fonts/user/.""" + if not file.filename: + raise HTTPException(status_code=400, detail="No filename") + suffix = _Path(file.filename).suffix.lower() + if suffix not in _FONT_EXTS: + raise HTTPException( + status_code=400, + detail=f"Unsupported font format {suffix}. Use .ttf, .otf, .woff, or .woff2.", + ) + + # Basic name sanitization + safe_name = _Path(file.filename).name.replace("/", "_").replace("\\", "_") + target = _USER_FONTS / safe_name + + contents = await file.read() + if len(contents) > 10 * 1024 * 1024: # 10 MB cap + raise HTTPException(status_code=413, detail="Font file too large (max 10 MB)") + target.write_bytes(contents) + + return { + "name": target.stem, + "source": "user", + "url": f"/api/fonts/file/user/{target.name}", + "size": len(contents), + } + + +@app.get("/api/fonts/file/{name}") +async def serve_bundled_font(name: str): + """Serve a bundled font (in assets/fonts/, not under user/).""" + safe = _Path(name).name # strip any path traversal + target = _ASSETS_FONTS / safe + if not target.is_file() or target.parent != _ASSETS_FONTS: + raise HTTPException(status_code=404, detail="Font not found") + mime = _FONT_MIME.get(target.suffix.lower(), "application/octet-stream") + return FileResponse(str(target), media_type=mime) + + +@app.get("/api/fonts/file/user/{name}") +async def serve_user_font(name: str): + """Serve a user-uploaded font from assets/fonts/user/.""" + safe = _Path(name).name + target = _USER_FONTS / safe + if not target.is_file() or target.parent != _USER_FONTS: + raise HTTPException(status_code=404, detail="Font not found") + mime = _FONT_MIME.get(target.suffix.lower(), "application/octet-stream") + return FileResponse(str(target), media_type=mime) diff --git a/backend/app/overlays/subtitles_generate.py b/backend/app/overlays/subtitles_generate.py index a4601de7..080f6b37 100644 --- a/backend/app/overlays/subtitles_generate.py +++ b/backend/app/overlays/subtitles_generate.py @@ -43,7 +43,16 @@ def transcribe_audio(video_path): return transcript -def generate_srt_from_video(video_path, output_path, max_chars=20, max_duration=2.0): +def _apply_text_case(text: str, text_case: str) -> str: + """Brand-kit case transform: 'upper' / 'lower' / anything else = original.""" + if text_case == "upper": + return text.upper() + if text_case == "lower": + return text.lower() + return text + + +def generate_srt_from_video(video_path, output_path, max_chars=20, max_duration=2.0, max_words=None, text_case="original"): """ Transcribe a video and generate SRT directly. Used for dubbed videos that don't have a pre-existing transcript. @@ -58,13 +67,17 @@ def generate_srt_from_video(video_path, output_path, max_chars=20, max_duration= duration = frame_count / fps if fps else 0 cap.release() - return generate_srt(transcript, 0, duration, output_path, max_chars, max_duration) + return generate_srt(transcript, 0, duration, output_path, max_chars, max_duration, max_words=max_words, text_case=text_case) -def generate_srt(transcript, clip_start, clip_end, output_path, max_chars=20, max_duration=2.0): +def generate_srt(transcript, clip_start, clip_end, output_path, max_chars=20, max_duration=2.0, max_words=None, text_case="original"): """ Generates an SRT file from the transcript for a specific time range. Groups words into short lines suitable for vertical video. + + ``max_words`` (optional) overrides character-based grouping with a fixed + words-per-line cap — set from the brand kit. None = use char heuristic only. + ``text_case`` applies the brand-kit casing: "original" | "upper" | "lower". """ words = [] @@ -99,13 +112,19 @@ def generate_srt(transcript, clip_start, clip_end, output_path, max_chars=20, ma current_text_len = sum(len(w['word']) + 1 for w in current_block) duration = end - block_start - if current_text_len + len(word['word']) > max_chars or duration > max_duration: + # Honor explicit words-per-line cap (brand kit) if set; falls back + # to character heuristic otherwise. + words_exceeded = max_words is not None and len(current_block) >= max_words + chars_exceeded = max_words is None and (current_text_len + len(word['word']) > max_chars) + + if words_exceeded or chars_exceeded or duration > max_duration: # Finalize current block # End time of block is start of this word (gap) or end of last word? # Usually end of last word. block_end = current_block[-1]['end'] - clip_start text = " ".join([w['word'] for w in current_block]).strip() + text = _apply_text_case(text, text_case) srt_content += format_srt_block(index, block_start, block_end, text) index += 1 @@ -118,6 +137,7 @@ def generate_srt(transcript, clip_start, clip_end, output_path, max_chars=20, ma if current_block: block_end = current_block[-1]['end'] - clip_start text = " ".join([w['word'] for w in current_block]).strip() + text = _apply_text_case(text, text_case) srt_content += format_srt_block(index, block_start, block_end, text) with open(output_path, 'w', encoding='utf-8') as f: diff --git a/backend/tests/snapshots/baseline.openapi.json b/backend/tests/snapshots/baseline.openapi.json index b9424deb..736b16b7 100644 --- a/backend/tests/snapshots/baseline.openapi.json +++ b/backend/tests/snapshots/baseline.openapi.json @@ -214,6 +214,20 @@ "title": "Body_thumbnail_upload_api_thumbnail_upload_post", "type": "object" }, + "Body_upload_font_api_fonts_upload_post": { + "properties": { + "file": { + "format": "binary", + "title": "File", + "type": "string" + } + }, + "required": [ + "file" + ], + "title": "Body_upload_font_api_fonts_upload_post", + "type": "object" + }, "EditRequest": { "properties": { "api_key": { @@ -711,6 +725,28 @@ "default": "bottom", "title": "Position", "type": "string" + }, + "text_case": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Text Case" + }, + "words_per_line": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Words Per Line" } }, "required": [ @@ -1042,6 +1078,136 @@ "summary": "Generate Effects Config" } }, + "/api/fonts": { + "get": { + "description": "Return the catalog of available fonts (system + user-uploaded).", + "operationId": "list_fonts_api_fonts_get", + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + } + }, + "summary": "List Fonts" + } + }, + "/api/fonts/file/user/{name}": { + "get": { + "description": "Serve a user-uploaded font from assets/fonts/user/.", + "operationId": "serve_user_font_api_fonts_file_user__name__get", + "parameters": [ + { + "in": "path", + "name": "name", + "required": true, + "schema": { + "title": "Name", + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Serve User Font" + } + }, + "/api/fonts/file/{name}": { + "get": { + "description": "Serve a bundled font (in assets/fonts/, not under user/).", + "operationId": "serve_bundled_font_api_fonts_file__name__get", + "parameters": [ + { + "in": "path", + "name": "name", + "required": true, + "schema": { + "title": "Name", + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Serve Bundled Font" + } + }, + "/api/fonts/upload": { + "post": { + "description": "Save a user-uploaded font under assets/fonts/user/.", + "operationId": "upload_font_api_fonts_upload_post", + "requestBody": { + "content": { + "multipart/form-data": { + "schema": { + "$ref": "#/components/schemas/Body_upload_font_api_fonts_upload_post" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Upload Font" + } + }, "/api/hook": { "post": { "operationId": "add_hook_api_hook_post", diff --git a/docker-compose.yml b/docker-compose.yml index 057fe07b..9b616cc7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,10 +3,10 @@ services: build: ./backend container_name: openshorts-backend ports: - - "8000:8000" + - "3002:8000" volumes: - ./backend:/app - - ./assets:/app/assets:ro + - ./assets:/app/assets - /app/__pycache__ - ./output:/app/output - ./uploads:/app/uploads @@ -16,7 +16,7 @@ services: build: ./frontend container_name: openshorts-frontend ports: - - "5175:5173" + - "3001:5173" volumes: - ./frontend:/app - /app/node_modules @@ -30,7 +30,7 @@ services: dockerfile: renderer/service/Dockerfile container_name: openshorts-renderer ports: - - "3100:3100" + - "3003:3100" volumes: - ./output:/output environment: diff --git a/frontend/src/components/BrandKit.jsx b/frontend/src/components/BrandKit.jsx new file mode 100644 index 00000000..05d077d0 --- /dev/null +++ b/frontend/src/components/BrandKit.jsx @@ -0,0 +1,218 @@ +import React, { useEffect, useState } from 'react'; +import { Palette, Plus, X, RotateCcw, Smartphone, Monitor, TextSelect, CaseUpper, CaseLower, CaseSensitive } from 'lucide-react'; +import { DEFAULT_BRAND_KIT, loadBrandKit, saveBrandKit, resetBrandKit, ensureFontLoaded } from '../lib/brandKit'; +import FontPicker from './FontPicker'; +import BrandPreview from './BrandPreview'; +import PositionGrid from './PositionGrid'; + +const RATIO_META = { + '9:16': { label: 'Shorts', icon: Smartphone }, + '16:9': { label: 'YouTube', icon: Monitor }, +}; + +export default function BrandKit() { + const [kit, setKit] = useState(() => loadBrandKit()); + const [ratio, setRatio] = useState('9:16'); + + useEffect(() => { + saveBrandKit(kit); + ensureFontLoaded(kit.font); + }, [kit]); + + const style = kit.styles[ratio]; + + const updateStyle = (patch) => setKit(k => ({ + ...k, + styles: { ...k.styles, [ratio]: { ...k.styles[ratio], ...patch } }, + })); + const updateFont = (f) => setKit(k => ({ ...k, font: f })); + const updateColor = (i, hex) => setKit(k => ({ ...k, colors: k.colors.map((c, idx) => idx === i ? { ...c, hex } : c) })); + const renameColor = (i, name) => setKit(k => ({ ...k, colors: k.colors.map((c, idx) => idx === i ? { ...c, name } : c) })); + const addColor = () => setKit(k => ({ ...k, colors: [...k.colors, { name: `Color ${k.colors.length + 1}`, hex: '#FF6B6B' }] })); + const removeColor = (i) => setKit(k => ({ ...k, colors: k.colors.filter((_, idx) => idx !== i) })); + + const handleReset = () => { + if (window.confirm('Reset brand kit to defaults?')) { + resetBrandKit(); + setKit(DEFAULT_BRAND_KIT); + } + }; + + return ( + <div className="bg-surface border border-white/5 rounded-2xl p-6 mt-6 animate-[fadeIn_0.5s_ease-out]"> + <div className="flex items-center justify-between mb-4"> + <div className="flex items-center gap-3"> + <div className="p-2 bg-fuchsia-500/20 rounded-lg text-fuchsia-400"> + <Palette size={20} /> + </div> + <div> + <h2 className="text-lg font-semibold">Brand Kit</h2> + <p className="text-xs text-zinc-500">Colors and font are shared; size, position, and word-wrap can differ per aspect ratio.</p> + </div> + </div> + <button + onClick={handleReset} + className="flex items-center gap-1.5 px-3 py-1.5 text-xs text-zinc-400 hover:text-white border border-white/10 hover:border-white/20 rounded-lg transition-colors" + > + <RotateCcw size={12} /> Reset + </button> + </div> + + <div className="grid md:grid-cols-2 gap-6"> + {/* LEFT: controls */} + <div className="space-y-6"> + {/* Brand colors (shared) */} + <div> + <label className="text-sm font-semibold text-zinc-200 mb-3 flex items-center justify-between"> + <span>Brand colors</span> + <span className="text-[10px] uppercase tracking-wider text-zinc-500">Shared</span> + </label> + <div className="space-y-2"> + {kit.colors.map((c, i) => ( + <div key={i} className="flex items-center gap-2"> + <label className="relative cursor-pointer shrink-0"> + <div className="w-10 h-10 rounded-lg border-2 border-white/10" style={{ backgroundColor: c.hex }} /> + <input type="color" value={c.hex} onChange={(e) => updateColor(i, e.target.value.toUpperCase())} className="absolute inset-0 opacity-0 cursor-pointer" /> + </label> + <input type="text" value={c.name} onChange={(e) => renameColor(i, e.target.value)} className="flex-1 px-3 py-2 bg-white/5 border border-white/10 rounded-lg text-sm focus:outline-none focus:border-primary" /> + <input type="text" value={c.hex} onChange={(e) => updateColor(i, e.target.value.toUpperCase())} className="w-24 px-3 py-2 bg-white/5 border border-white/10 rounded-lg text-sm font-mono focus:outline-none focus:border-primary" /> + {kit.colors.length > 1 && ( + <button onClick={() => removeColor(i)} className="p-2 text-zinc-500 hover:text-red-400 transition-colors"> + <X size={14} /> + </button> + )} + </div> + ))} + <button onClick={addColor} className="w-full flex items-center justify-center gap-1.5 py-2 text-xs text-zinc-400 hover:text-white border border-dashed border-white/10 hover:border-white/30 rounded-lg transition-colors"> + <Plus size={12} /> Add color + </button> + </div> + </div> + + {/* Font (shared) */} + <div> + <label className="text-sm font-semibold text-zinc-200 mb-3 flex items-center justify-between"> + <span>Font</span> + <span className="text-[10px] uppercase tracking-wider text-zinc-500">Shared</span> + </label> + <FontPicker value={kit.font} onChange={updateFont} /> + </div> + + {/* Ratio toggle */} + <div className="border-t border-white/5 pt-4"> + <div className="flex items-center justify-between mb-3"> + <label className="text-sm font-semibold text-zinc-200">Layout settings</label> + <div className="flex gap-1 p-1 bg-white/5 rounded-lg"> + {Object.entries(RATIO_META).map(([key, { label, icon: I }]) => ( + <button + key={key} + onClick={() => setRatio(key)} + className={`flex items-center gap-1.5 px-3 py-1 rounded text-xs font-medium transition-colors ${ratio === key ? 'bg-primary text-white' : 'text-zinc-400 hover:text-white'}`} + > + <I size={12} /> + {key} + </button> + ))} + </div> + </div> + + {/* Size + Stroke width (per-ratio) */} + <div className="grid grid-cols-2 gap-4 mb-4"> + <div> + <label className="text-sm text-zinc-300 mb-2 flex items-center justify-between"> + <span>Size</span> + <span className="text-zinc-500 text-xs font-mono">{style.size}px</span> + </label> + <input type="range" min="24" max="200" value={style.size} onChange={(e) => updateStyle({ size: parseInt(e.target.value, 10) })} className="w-full accent-fuchsia-500" /> + </div> + <div> + <label className="text-sm text-zinc-300 mb-2 flex items-center justify-between"> + <span>Stroke width</span> + <span className="text-zinc-500 text-xs font-mono">{style.strokeWidth}px</span> + </label> + <input type="range" min="0" max="20" value={style.strokeWidth} onChange={(e) => updateStyle({ strokeWidth: parseInt(e.target.value, 10) })} className="w-full accent-fuchsia-500" /> + </div> + </div> + + {/* Words per line (per-ratio) */} + <div className="mb-4"> + <label className="text-sm text-zinc-300 mb-2 flex items-center justify-between"> + <span className="flex items-center gap-2"><TextSelect size={14} /> Words per line</span> + <span className="text-zinc-500 text-xs font-mono">{style.wordsPerLine === 0 ? 'no wrap' : `${style.wordsPerLine} word${style.wordsPerLine === 1 ? '' : 's'}`}</span> + </label> + <input type="range" min="0" max="15" value={style.wordsPerLine} onChange={(e) => updateStyle({ wordsPerLine: parseInt(e.target.value, 10) })} className="w-full accent-fuchsia-500" /> + <p className="text-[11px] text-zinc-500 mt-1"> + {style.wordsPerLine === 0 + ? 'No wrapping — single line until it overflows.' + : style.wordsPerLine <= 3 + ? 'Hormozi style — short bursts, max impact.' + : style.wordsPerLine <= 7 + ? 'Balanced — readable on mobile.' + : 'Full sentences — good for wider 16:9 frames.'} + </p> + </div> + + {/* Text case (per-ratio) */} + <div className="mb-4"> + <label className="text-sm text-zinc-300 mb-2 block">Text case</label> + <div className="flex gap-1 p-1 bg-white/5 rounded-lg"> + {[ + { key: 'original', icon: CaseSensitive, label: 'As typed' }, + { key: 'upper', icon: CaseUpper, label: 'UPPERCASE' }, + { key: 'lower', icon: CaseLower, label: 'lowercase' }, + ].map(({ key, icon: I, label }) => ( + <button + key={key} + onClick={() => updateStyle({ textCase: key })} + className={`flex-1 flex items-center justify-center gap-1.5 py-2 rounded text-xs font-medium transition-colors ${style.textCase === key ? 'bg-primary text-white' : 'text-zinc-400 hover:text-white'}`} + > + <I size={14} /> + {label} + </button> + ))} + </div> + </div> + + {/* Text color (uses brand palette) */} + <div className="grid grid-cols-2 gap-4 mb-4"> + <div> + <label className="text-sm text-zinc-300 mb-2 block">Text color</label> + <div className="flex gap-2 flex-wrap"> + {kit.colors.map((c, i) => ( + <button key={i} onClick={() => updateStyle({ textColor: c.hex })} className={`w-8 h-8 rounded-lg border-2 transition-all ${style.textColor === c.hex ? 'border-white scale-110' : 'border-white/10 hover:border-white/30'}`} style={{ backgroundColor: c.hex }} title={c.name} /> + ))} + <input type="color" value={style.textColor} onChange={(e) => updateStyle({ textColor: e.target.value.toUpperCase() })} className="w-8 h-8 rounded-lg cursor-pointer bg-transparent border border-white/10" title="Custom" /> + </div> + </div> + <div> + <label className="text-sm text-zinc-300 mb-2 block">Stroke color</label> + <div className="flex gap-2 flex-wrap"> + {kit.colors.map((c, i) => ( + <button key={i} onClick={() => updateStyle({ strokeColor: c.hex })} className={`w-8 h-8 rounded-lg border-2 transition-all ${style.strokeColor === c.hex ? 'border-white scale-110' : 'border-white/10 hover:border-white/30'}`} style={{ backgroundColor: c.hex }} title={c.name} /> + ))} + <input type="color" value={style.strokeColor} onChange={(e) => updateStyle({ strokeColor: e.target.value.toUpperCase() })} className="w-8 h-8 rounded-lg cursor-pointer bg-transparent border border-white/10" title="Custom" /> + </div> + </div> + </div> + + {/* 9-anchor position grid (per-ratio) */} + <PositionGrid value={style.position} onChange={(p) => updateStyle({ position: p })} /> + </div> + </div> + + {/* RIGHT: live preview */} + <div> + <BrandPreview + brandKit={kit} + activeRatio={ratio} + onRatioChange={setRatio} + onPreviewTextChange={(text) => setKit(k => ({ ...k, previewText: text }))} + /> + <div className="mt-3 p-3 rounded-lg bg-white/5 border border-white/10 text-xs text-zinc-400 leading-relaxed"> + Applied automatically to <span className="text-white font-medium">subtitles</span>, <span className="text-white font-medium">hook overlays</span>, and <span className="text-white font-medium">AI effect text</span>. Per-clip overrides remain possible. + </div> + </div> + </div> + </div> + ); +} diff --git a/frontend/src/components/BrandPreview.jsx b/frontend/src/components/BrandPreview.jsx new file mode 100644 index 00000000..bcd6cb52 --- /dev/null +++ b/frontend/src/components/BrandPreview.jsx @@ -0,0 +1,211 @@ +import React, { useEffect, useMemo, useRef, useState } from 'react'; +import { Smartphone, Monitor, MessageSquare, RotateCcw, Pause, Play } from 'lucide-react'; +import { wrapByWords, applyTextCase, DEFAULT_PREVIEW_TEXT } from '../lib/brandKit'; + +// Live preview canvas that mirrors how subtitles actually flash on a video: +// ONE chunk on screen at a time. We split the user's text into chunks of N +// words (N = brand kit's wordsPerLine) and auto-cycle through them like real +// SRT playback. Dots beneath the canvas let the user jump to any chunk. + +const FRAME_CONFIG = { + '9:16': { w: 270, h: 480, scaleBase: 1920 }, + '16:9': { w: 480, h: 270, scaleBase: 1080 }, +}; + +const CYCLE_MS = 1500; + +function anchorToCoords(position, w, h) { + const [vert, horiz] = position.split('-'); + const padX = w * 0.06; + const padY = h * 0.08; + let x, textAlign; + if (horiz === 'left') { x = padX; textAlign = 'left'; } + else if (horiz === 'right') { x = w - padX; textAlign = 'right'; } + else { x = w / 2; textAlign = 'center'; } + let yBand; + if (vert === 'top') yBand = padY; + else if (vert === 'middle') yBand = h / 2; + else yBand = h - padY; + return { x, yBand, textAlign, vert }; +} + +export default function BrandPreview({ brandKit, activeRatio, onRatioChange, onPreviewTextChange }) { + const canvasRef = useRef(null); + const ratio = activeRatio || '9:16'; + const previewText = brandKit.previewText ?? DEFAULT_PREVIEW_TEXT; + const style = brandKit.styles[ratio]; + + // Split the user's text into chunks of N words — each chunk is one + // "subtitle moment" on screen, just like the real burn-in. + const chunks = useMemo( + () => wrapByWords(previewText, style.wordsPerLine), + [previewText, style.wordsPerLine] + ); + + const [chunkIdx, setChunkIdx] = useState(0); + const [paused, setPaused] = useState(false); + + // Clamp index when chunks change (e.g., user dragged words-per-line) + useEffect(() => { + if (chunkIdx >= chunks.length) setChunkIdx(0); + }, [chunks.length, chunkIdx]); + + // Auto-cycle + useEffect(() => { + if (paused || chunks.length <= 1) return; + const id = setInterval(() => { + setChunkIdx(i => (i + 1) % chunks.length); + }, CYCLE_MS); + return () => clearInterval(id); + }, [paused, chunks.length]); + + const draw = () => { + const canvas = canvasRef.current; + if (!canvas) return; + const { w, h, scaleBase } = FRAME_CONFIG[ratio]; + canvas.width = w; + canvas.height = h; + const ctx = canvas.getContext('2d'); + + // Mock frame background + const grad = ctx.createLinearGradient(0, 0, 0, h); + grad.addColorStop(0, '#1a1a2e'); + grad.addColorStop(0.5, '#16213e'); + grad.addColorStop(1, '#0f3460'); + ctx.fillStyle = grad; + ctx.fillRect(0, 0, w, h); + ctx.fillStyle = 'rgba(255,255,255,0.08)'; + ctx.beginPath(); + ctx.arc(w / 2, h * 0.35, h * 0.18, 0, Math.PI * 2); + ctx.fill(); + + const { font } = brandKit; + const previewScale = h / scaleBase; + const fontSize = Math.max(10, Math.round(style.size * previewScale * 2.6)); + const strokeWidth = Math.max(0, Math.round(style.strokeWidth * previewScale * 2.6)); + + ctx.font = `bold ${fontSize}px "${font.family}", system-ui, sans-serif`; + ctx.lineJoin = 'round'; + + // The "current moment" — exactly one chunk on screen, with brand-kit case applied. + const text = applyTextCase(chunks[chunkIdx] ?? '', style.textCase); + + const { x, yBand, textAlign, vert } = anchorToCoords(style.position, w, h); + ctx.textAlign = textAlign; + ctx.textBaseline = 'middle'; + const lineHeight = fontSize * 1.15; + let y; + if (vert === 'top') y = yBand + lineHeight / 2; + else if (vert === 'middle') y = yBand; + else y = yBand - lineHeight / 2; + + if (strokeWidth > 0) { + ctx.strokeStyle = style.strokeColor; + ctx.lineWidth = strokeWidth; + ctx.strokeText(text, x, y); + } + ctx.fillStyle = style.textColor; + ctx.fillText(text, x, y); + }; + + useEffect(() => { + draw(); + if (document.fonts && document.fonts.ready) { + document.fonts.ready.then(draw); + } + }, [brandKit, ratio, chunkIdx, chunks]); + + const wordCount = previewText.trim().split(/\s+/).filter(Boolean).length; + + return ( + <div> + <div className="flex items-center justify-between mb-3"> + <div className="text-sm font-semibold text-zinc-300">Live preview</div> + <div className="flex gap-1 p-1 bg-white/5 rounded-lg"> + {Object.entries(FRAME_CONFIG).map(([key]) => { + const Icon = key === '9:16' ? Smartphone : Monitor; + return ( + <button + key={key} + type="button" + onClick={() => onRatioChange?.(key)} + className={`flex items-center gap-1.5 px-3 py-1 rounded text-xs font-medium transition-colors ${ratio === key ? 'bg-primary text-white' : 'text-zinc-400 hover:text-white'}`} + > + <Icon size={12} /> + {key} + </button> + ); + })} + </div> + </div> + + {/* Editable preview text */} + <div className="mb-3"> + <label className="flex items-center justify-between text-xs text-zinc-400 mb-1.5"> + <span className="flex items-center gap-1.5"> + <MessageSquare size={12} /> Try your own text + </span> + <span className="flex items-center gap-2"> + <span className="font-mono">{wordCount} {wordCount === 1 ? 'word' : 'words'}</span> + {previewText !== DEFAULT_PREVIEW_TEXT && ( + <button + type="button" + onClick={() => onPreviewTextChange?.(DEFAULT_PREVIEW_TEXT)} + className="text-zinc-500 hover:text-white flex items-center gap-1" + > + <RotateCcw size={10} /> reset + </button> + )} + </span> + </label> + <textarea + value={previewText} + onChange={(e) => onPreviewTextChange?.(e.target.value)} + rows={2} + placeholder="Type a sample sentence to see how it wraps…" + className="w-full px-3 py-2 bg-white/5 border border-white/10 rounded-lg text-sm text-zinc-200 placeholder-zinc-600 focus:outline-none focus:border-primary resize-y" + /> + </div> + + {/* Canvas */} + <div className="flex items-center justify-center bg-black/50 border border-white/10 rounded-xl p-6"> + <canvas + ref={canvasRef} + className="rounded-md shadow-2xl" + style={{ maxWidth: '100%', height: 'auto' }} + /> + </div> + + {/* Chunk navigation */} + <div className="mt-3 flex items-center justify-between gap-3"> + <button + type="button" + onClick={() => setPaused(p => !p)} + className="flex items-center gap-1 px-2 py-1 text-xs text-zinc-400 hover:text-white border border-white/10 hover:border-white/20 rounded transition-colors" + > + {paused ? <><Play size={10} /> Play</> : <><Pause size={10} /> Pause</>} + </button> + + <div className="flex-1 flex items-center justify-center gap-1.5 flex-wrap"> + {chunks.map((chunk, i) => ( + <button + key={i} + type="button" + onClick={() => { setChunkIdx(i); setPaused(true); }} + title={chunk} + className={`h-1.5 rounded-full transition-all ${i === chunkIdx ? 'bg-primary w-8' : 'bg-white/15 hover:bg-white/30 w-2'}`} + /> + ))} + </div> + + <div className="text-xs text-zinc-500 font-mono shrink-0"> + {chunkIdx + 1}/{chunks.length} + </div> + </div> + + <p className="text-[11px] text-zinc-500 mt-2 text-center leading-relaxed"> + Each <span className="text-zinc-300">chunk = {style.wordsPerLine || '∞'} word{style.wordsPerLine === 1 ? '' : 's'}</span> on screen at once — exactly what your subtitle burn-in produces. Cycling automatically every {(CYCLE_MS / 1000).toFixed(1)}s. + </p> + </div> + ); +} diff --git a/frontend/src/components/FontPicker.jsx b/frontend/src/components/FontPicker.jsx new file mode 100644 index 00000000..f518433f --- /dev/null +++ b/frontend/src/components/FontPicker.jsx @@ -0,0 +1,142 @@ +import React, { useEffect, useRef, useState } from 'react'; +import { Check, Type, Upload, X } from 'lucide-react'; +import { ensureFontLoaded } from '../lib/brandKit'; + +export default function FontPicker({ value, onChange }) { + const [fonts, setFonts] = useState([]); + const [open, setOpen] = useState(false); + const [uploading, setUploading] = useState(false); + const [uploadError, setUploadError] = useState(null); + const inputRef = useRef(null); + const listRef = useRef(null); + + const refresh = async () => { + try { + const r = await fetch('/api/fonts'); + const data = await r.json(); + setFonts(data.fonts || []); + // Pre-register bundled + user fonts so the list previews render correctly. + (data.fonts || []).forEach(ensureFontLoaded); + } catch (e) { + console.warn('Failed to load fonts:', e); + } + }; + + useEffect(() => { refresh(); }, []); + + // Close on outside click + useEffect(() => { + const handler = (e) => { + if (listRef.current && !listRef.current.contains(e.target)) setOpen(false); + }; + if (open) document.addEventListener('mousedown', handler); + return () => document.removeEventListener('mousedown', handler); + }, [open]); + + const handleUpload = async (e) => { + const file = e.target.files?.[0]; + if (!file) return; + setUploading(true); + setUploadError(null); + try { + const form = new FormData(); + form.append('file', file); + const r = await fetch('/api/fonts/upload', { method: 'POST', body: form }); + if (!r.ok) { + const err = await r.json().catch(() => ({})); + throw new Error(err.detail || `Upload failed (${r.status})`); + } + const uploaded = await r.json(); + await refresh(); + onChange(uploaded); + } catch (err) { + setUploadError(err.message); + } finally { + setUploading(false); + e.target.value = ''; + } + }; + + const selectedName = value?.family || 'Inter'; + + return ( + <div ref={listRef} className="relative"> + <button + type="button" + onClick={() => setOpen(o => !o)} + className="w-full flex items-center justify-between px-4 py-3 bg-white/5 hover:bg-white/10 border border-white/10 rounded-xl text-left transition-colors" + > + <span className="flex items-center gap-3 min-w-0"> + <Type size={16} className="text-zinc-400 shrink-0" /> + <span + className="truncate text-base" + style={{ fontFamily: `"${selectedName}", system-ui` }} + > + {selectedName} + </span> + {value?.source && value.source !== 'system' && ( + <span className="text-[10px] uppercase tracking-wide text-zinc-500 shrink-0"> + {value.source} + </span> + )} + </span> + <svg width="12" height="12" viewBox="0 0 12 12" className={`text-zinc-400 transition-transform ${open ? 'rotate-180' : ''}`}> + <path d="M2 4l4 4 4-4" stroke="currentColor" strokeWidth="1.5" fill="none" /> + </svg> + </button> + + {open && ( + <div className="absolute z-20 mt-2 w-full bg-surface border border-white/10 rounded-xl shadow-2xl overflow-hidden"> + <div className="max-h-72 overflow-y-auto custom-scrollbar"> + {fonts.map((f) => ( + <button + type="button" + key={`${f.source}-${f.name}`} + onClick={() => { onChange(f); setOpen(false); }} + className={`w-full flex items-center gap-3 px-4 py-3 hover:bg-white/5 text-left transition-colors border-b border-white/5 last:border-b-0 ${value?.family === f.name ? 'bg-primary/10' : ''}`} + > + <span + className="flex-1 truncate text-base" + style={{ fontFamily: `"${f.name}", system-ui` }} + > + {f.name} + </span> + <span className="text-[10px] uppercase tracking-wide text-zinc-500">{f.source}</span> + {value?.family === f.name && <Check size={14} className="text-primary" />} + </button> + ))} + </div> + + <label className="block border-t border-white/10 px-4 py-3 hover:bg-white/5 cursor-pointer text-sm text-zinc-300"> + <span className="flex items-center gap-2"> + {uploading ? ( + <> + <div className="w-4 h-4 border-2 border-primary border-t-transparent rounded-full animate-spin" /> + Uploading… + </> + ) : ( + <> + <Upload size={14} /> + Upload .ttf / .otf / .woff + </> + )} + </span> + <input + ref={inputRef} + type="file" + accept=".ttf,.otf,.woff,.woff2,font/ttf,font/otf,font/woff,font/woff2" + onChange={handleUpload} + className="hidden" + disabled={uploading} + /> + {uploadError && ( + <span className="text-xs text-red-400 mt-1 flex items-center gap-1"> + <X size={12} /> {uploadError} + </span> + )} + </label> + </div> + )} + </div> + ); +} diff --git a/frontend/src/components/HookModal.jsx b/frontend/src/components/HookModal.jsx index db50e25e..9da38117 100644 --- a/frontend/src/components/HookModal.jsx +++ b/frontend/src/components/HookModal.jsx @@ -1,6 +1,19 @@ import React, { useState } from 'react'; import { X, Sparkles, Loader2, Maximize, MoveVertical, Zap } from 'lucide-react'; import RemotionPreview from './RemotionPreview'; +import { useBrandKit } from '../lib/brandKit'; + +// Map 9-anchor brand position to the modal's simpler 3-position (top/middle/bottom). +function anchorToVerticalPosition(anchor) { + if (anchor?.startsWith('top')) return 'top'; + if (anchor?.startsWith('middle')) return 'middle'; + return 'bottom'; +} +function sizeToBucket(px) { + if (px >= 96) return 'L'; + if (px >= 60) return 'M'; + return 'S'; +} const ENTRANCE_OPTIONS = [ { value: 'spring', label: 'Bounce' }, @@ -10,9 +23,11 @@ const ENTRANCE_OPTIONS = [ ]; export default function HookModal({ isOpen, onClose, onGenerate, isProcessing, videoUrl, initialText, durationInSeconds, existingSubtitles }) { + const brandKit = useBrandKit(); + const bkStyle = brandKit.styles['9:16']; const [text, setText] = useState(initialText || 'POV: You are using the viral hook feature'); - const [position, setPosition] = useState('top'); - const [size, setSize] = useState('M'); + const [position, setPosition] = useState(anchorToVerticalPosition(bkStyle.position)); + const [size, setSize] = useState(sizeToBucket(bkStyle.size)); const [entranceAnimation, setEntranceAnimation] = useState('spring'); const [displayDuration, setDisplayDuration] = useState(5); diff --git a/frontend/src/components/PositionGrid.jsx b/frontend/src/components/PositionGrid.jsx new file mode 100644 index 00000000..e97022df --- /dev/null +++ b/frontend/src/components/PositionGrid.jsx @@ -0,0 +1,55 @@ +import React from 'react'; +import { ALL_POSITIONS } from '../lib/brandKit'; + +// 3x3 anchor selector. Each cell represents the text's anchor point on the +// video frame. Same UI pattern Figma uses for object alignment. + +const LABELS = { + 'top-left': 'Top L', + 'top-center': 'Top', + 'top-right': 'Top R', + 'middle-left': 'Left', + 'middle-center': 'Center', + 'middle-right': 'Right', + 'bottom-left': 'Bot L', + 'bottom-center': 'Bottom', + 'bottom-right': 'Bot R', +}; + +export default function PositionGrid({ value, onChange, label = 'Text position' }) { + return ( + <div> + {label && ( + <label className="text-sm font-semibold text-zinc-200 mb-2 block"> + {label} + <span className="ml-2 text-zinc-500 font-normal text-xs"> + {LABELS[value] || value} + </span> + </label> + )} + <div className="inline-block p-2 bg-white/5 border border-white/10 rounded-xl"> + <div className="grid grid-cols-3 gap-1 w-32 aspect-[9/16]"> + {ALL_POSITIONS.map((pos) => { + const isActive = value === pos; + return ( + <button + key={pos} + type="button" + onClick={() => onChange(pos)} + title={LABELS[pos]} + aria-label={LABELS[pos]} + className={`relative rounded transition-all flex items-center justify-center ${ + isActive + ? 'bg-primary text-white shadow-md scale-105' + : 'bg-white/5 hover:bg-white/15 text-zinc-500 hover:text-zinc-200' + }`} + > + <span className={`w-1.5 h-1.5 rounded-full ${isActive ? 'bg-white' : 'bg-current'}`} /> + </button> + ); + })} + </div> + </div> + </div> + ); +} diff --git a/frontend/src/components/ResultCard.jsx b/frontend/src/components/ResultCard.jsx index a1ac9c45..24bc0427 100644 --- a/frontend/src/components/ResultCard.jsx +++ b/frontend/src/components/ResultCard.jsx @@ -179,6 +179,8 @@ export default function ResultCard({ clip, index, jobId, uploadPostKey, uploadUs border_width: options.borderWidth, bg_color: options.bgColor, bg_opacity: options.bgOpacity, + words_per_line: options.wordsPerLine, + text_case: options.textCase, input_filename: currentVideoUrl.split('/').pop() }) }); diff --git a/frontend/src/components/SubtitleModal.jsx b/frontend/src/components/SubtitleModal.jsx index bedc2666..6f4e3f54 100644 --- a/frontend/src/components/SubtitleModal.jsx +++ b/frontend/src/components/SubtitleModal.jsx @@ -2,6 +2,15 @@ import React, { useState, useEffect } from 'react'; import { X, Type, Loader2 } from 'lucide-react'; import { getApiUrl } from '../config'; import RemotionPreview from './RemotionPreview'; +import { useBrandKit } from '../lib/brandKit'; + +// Map the 9-anchor brand position to the modal's simpler 3-position picker. +// Subtitles in this pipeline are always vertical (9:16) output. +function anchorToVerticalPosition(anchor) { + if (anchor?.startsWith('top')) return 'top'; + if (anchor?.startsWith('middle')) return 'middle'; + return 'bottom'; +} const FONT_OPTIONS = [ { value: 'Verdana', label: 'Verdana' }, @@ -29,13 +38,30 @@ const ANIMATION_OPTIONS = [ ]; export default function SubtitleModal({ isOpen, onClose, onGenerate, isProcessing, videoUrl, jobId, clipIndex, existingHook }) { - const [position, setPosition] = useState('bottom'); - const [fontSize, setFontSize] = useState(24); - const [fontName, setFontName] = useState('Verdana'); - const [fontColor, setFontColor] = useState('#FFFFFF'); - const [highlightColor, setHighlightColor] = useState('#FFDD00'); - const [borderColor, setBorderColor] = useState('#000000'); - const [borderWidth, setBorderWidth] = useState(2); + // Pulls live brand-kit values (re-renders on any settings change). + const brandKit = useBrandKit(); + const bkStyle = brandKit.styles['9:16']; + + const [position, setPosition] = useState(anchorToVerticalPosition(bkStyle.position)); + // Brand size is in 1080p px; the modal's `fontSize` is for a small preview (~× 2.2 ratio when burned in). + const [fontSize, setFontSize] = useState(Math.round(bkStyle.size / 2.5)); + const [fontName, setFontName] = useState(brandKit.font.family); + const [fontColor, setFontColor] = useState(bkStyle.textColor); + const [highlightColor, setHighlightColor] = useState(brandKit.colors[1]?.hex || '#FFDD00'); + const [borderColor, setBorderColor] = useState(bkStyle.strokeColor); + const [borderWidth, setBorderWidth] = useState(Math.max(1, Math.round(bkStyle.strokeWidth / 2.5))); + // Words-per-line flows to the backend SRT generator as `max_words`. + const [wordsPerLine] = useState(bkStyle.wordsPerLine); + // Text case (original | upper | lower) — applied to the SRT text by the backend. + const [textCase] = useState(bkStyle.textCase); + + // Re-pull brand defaults if the kit changes while modal is open and user hasn't touched values. + // (Conservative: only resets if matching previous defaults; lets manual edits stick.) + useEffect(() => { + if (!isOpen) return; + setPosition(prev => prev === anchorToVerticalPosition(bkStyle.position) ? prev : prev); + // Note: full bidirectional sync would require tracking "user dirty" state; left to a future pass. + }, [isOpen, brandKit]); const [bgColor, setBgColor] = useState('#000000'); const [bgOpacity, setBgOpacity] = useState(0.0); const [animation, setAnimation] = useState('pop'); @@ -357,6 +383,7 @@ export default function SubtitleModal({ isOpen, onClose, onGenerate, isProcessin <button onClick={() => onGenerate({ position, fontSize, fontName, fontColor, borderColor, borderWidth, bgColor, bgOpacity, + wordsPerLine, textCase, // Remotion data remotion: useRemotionPreview ? subtitleConfig : null, })} diff --git a/frontend/src/lib/brandKit.js b/frontend/src/lib/brandKit.js new file mode 100644 index 00000000..00efc506 --- /dev/null +++ b/frontend/src/lib/brandKit.js @@ -0,0 +1,191 @@ +// Brand Kit persistence + defaults. +// +// The brand kit defines how text overlays look across the app. Layout-level +// settings (size, stroke width, position, words-per-line) are stored +// PER ASPECT RATIO — Shorts (9:16) and YouTube (16:9) usually want very +// different sizing and wrapping. Colors and font are SHARED across ratios +// because they're brand identity, not layout. +// +// Shape: +// { +// colors: [{ name, hex }, ...] // 1+ named brand colors +// font: { family, source, url? } // url present for bundled/user fonts +// styles: { +// '9:16': { size, strokeWidth, textColor, strokeColor, position, wordsPerLine }, +// '16:9': { size, strokeWidth, textColor, strokeColor, position, wordsPerLine }, +// } +// } +// +// Position values use a 3x3 anchor grid: +// top-left | top-center | top-right +// middle-left | middle-center | middle-right +// bottom-left | bottom-center | bottom-right +// +// `size` and `strokeWidth` are in 1080p-equivalent pixels (canvas units). + +import { useEffect, useState } from 'react'; + +const STORAGE_KEY = 'openshorts.brandKit.v2'; +const LEGACY_KEY = 'openshorts.brandKit.v1'; + +export const ALL_POSITIONS = [ + 'top-left', 'top-center', 'top-right', + 'middle-left', 'middle-center', 'middle-right', + 'bottom-left', 'bottom-center', 'bottom-right', +]; + +export const DEFAULT_PREVIEW_TEXT = 'Stop scrolling and watch this insane clip right now before you regret it'; + +export const DEFAULT_BRAND_KIT = { + colors: [ + { name: 'Primary', hex: '#FFFFFF' }, + { name: 'Accent', hex: '#FFD60A' }, + { name: 'Stroke', hex: '#000000' }, + ], + font: { family: 'Inter', source: 'system', url: null }, + previewText: DEFAULT_PREVIEW_TEXT, + styles: { + '9:16': { + size: 72, + strokeWidth: 6, + textColor: '#FFFFFF', + strokeColor: '#000000', + position: 'bottom-center', + wordsPerLine: 2, + textCase: 'upper', // Hormozi default for shorts + }, + '16:9': { + size: 48, + strokeWidth: 4, + textColor: '#FFFFFF', + strokeColor: '#000000', + position: 'bottom-center', + wordsPerLine: 10, + textCase: 'original', + }, + }, +}; + +// Apply a brand-kit text-case setting to a string. +export function applyTextCase(text, textCase) { + if (textCase === 'upper') return String(text).toUpperCase(); + if (textCase === 'lower') return String(text).toLowerCase(); + return String(text); +} + +function migrateLegacy(parsed) { + // v1 had a single { style: ... } block; convert to per-ratio styles. + if (parsed?.style && !parsed.styles) { + const legacyPos = parsed.style.position; // 'top' | 'middle' | 'bottom' + const legacyAlign = parsed.style.align || 'center'; // 'left' | 'center' | 'right' + const mapped = `${legacyPos === 'middle' ? 'middle' : legacyPos}-${legacyAlign}`; + const safePosition = ALL_POSITIONS.includes(mapped) ? mapped : 'bottom-center'; + const base = { + size: parsed.style.size, + strokeWidth: parsed.style.strokeWidth, + textColor: parsed.style.textColor, + strokeColor: parsed.style.strokeColor, + position: safePosition, + }; + return { + colors: parsed.colors || DEFAULT_BRAND_KIT.colors, + font: parsed.font || DEFAULT_BRAND_KIT.font, + styles: { + '9:16': { ...DEFAULT_BRAND_KIT.styles['9:16'], ...base, wordsPerLine: 2 }, + '16:9': { ...DEFAULT_BRAND_KIT.styles['16:9'], ...base, wordsPerLine: 10, size: Math.round(base.size * 0.66) }, + }, + }; + } + return parsed; +} + +export function loadBrandKit() { + try { + let raw = localStorage.getItem(STORAGE_KEY); + if (!raw) { + // Look for legacy v1 and migrate. + const legacy = localStorage.getItem(LEGACY_KEY); + if (legacy) { + const migrated = migrateLegacy(JSON.parse(legacy)); + if (migrated?.styles) { + localStorage.setItem(STORAGE_KEY, JSON.stringify(migrated)); + localStorage.removeItem(LEGACY_KEY); + return migrated; + } + } + return DEFAULT_BRAND_KIT; + } + const parsed = JSON.parse(raw); + return { + colors: parsed.colors?.length ? parsed.colors : DEFAULT_BRAND_KIT.colors, + font: { ...DEFAULT_BRAND_KIT.font, ...(parsed.font || {}) }, + previewText: parsed.previewText ?? DEFAULT_PREVIEW_TEXT, + styles: { + '9:16': { ...DEFAULT_BRAND_KIT.styles['9:16'], ...(parsed.styles?.['9:16'] || {}) }, + '16:9': { ...DEFAULT_BRAND_KIT.styles['16:9'], ...(parsed.styles?.['16:9'] || {}) }, + }, + }; + } catch { + return DEFAULT_BRAND_KIT; + } +} + +export function saveBrandKit(kit) { + try { + localStorage.setItem(STORAGE_KEY, JSON.stringify(kit)); + window.dispatchEvent(new CustomEvent('brandKit:changed', { detail: kit })); + } catch (e) { + console.error('Failed to save brand kit:', e); + } +} + +export function resetBrandKit() { + localStorage.removeItem(STORAGE_KEY); + localStorage.removeItem(LEGACY_KEY); + window.dispatchEvent(new CustomEvent('brandKit:changed', { detail: DEFAULT_BRAND_KIT })); +} + +// React hook: returns the current brand kit and re-renders any time it's +// changed (in this tab or another). Use this in modals so they always see +// the latest settings, not a stale snapshot. +export function useBrandKit() { + const [kit, setKit] = useState(() => loadBrandKit()); + useEffect(() => { + const onChange = (e) => setKit(e.detail || loadBrandKit()); + const onStorage = (e) => { + if (e.key === STORAGE_KEY) setKit(loadBrandKit()); + }; + window.addEventListener('brandKit:changed', onChange); + window.addEventListener('storage', onStorage); + return () => { + window.removeEventListener('brandKit:changed', onChange); + window.removeEventListener('storage', onStorage); + }; + }, []); + return kit; +} + +// Helper: wrap a string into lines of N words. +export function wrapByWords(text, wordsPerLine) { + const words = String(text).split(/\s+/).filter(Boolean); + if (wordsPerLine <= 0) return [words.join(' ')]; + const lines = []; + for (let i = 0; i < words.length; i += wordsPerLine) { + lines.push(words.slice(i, i + wordsPerLine).join(' ')); + } + return lines.length ? lines : ['']; +} + +// Register an @font-face for a bundled/user font so the browser can render it. +const _registeredFonts = new Set(); +export function ensureFontLoaded(font) { + if (!font?.url || font.source === 'system') return; + if (_registeredFonts.has(font.family)) return; + const fontFace = new FontFace(font.family, `url(${font.url})`); + fontFace.load().then((loaded) => { + document.fonts.add(loaded); + _registeredFonts.add(font.family); + }).catch((err) => { + console.warn(`Failed to load font ${font.family}:`, err); + }); +} From 667a88e1f677ea96247e696ea0ed980e39a02195 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 21:22:54 -0400 Subject: [PATCH 21/43] =?UTF-8?q?feat(ui):=20phase=201=20=E2=80=94=20shell?= =?UTF-8?q?=20+=20theme=20+=20routing=20skeleton?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the App.jsx tab switcher with react-router-dom (HashRouter) and a new platform shell: - 210px fixed Sidebar with 5 items (Dashboard, Short-form, Long-form, Clip Generator, Settings). - 50px persistent Header (page title + notification bell stub). - New theme tokens in tailwind.config.js (bg #0c0c0c, sidebar #111, surface #141414, indigo accent #5b5ef4, platform colors). Extract cross-page state out of App.jsx: - state/keysStore.js — Gemini/Upload-Post/ElevenLabs/fal keys + profile. - state/jobStore.js — jobId/status/results/processingMedia/session. - hooks/useJobPolling.js — /api/status/{id} polling loop. - lib/crypto.js — XOR+Base64 helpers extracted from App.jsx. Pages: - /clip-generator carries the existing process flow (uses extracted stores). - /settings keeps existing config UI (Gemini, Brand Kit, Upload-Post, ElevenLabs, fal.ai) until Phase 2 rebuilds with VS Code layout. - /dashboard, /short-form, /long-form are stubs until phases 3-4. Legacy code preserved at /legacy/saasshorts, /legacy/thumbnails, /legacy/ugc, /legacy/ai-agent — hidden from sidebar, reachable by URL. main.jsx wraps App in HashRouter; resolveView treats hash starting with '#/' as in-app so deep links survive reloads. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- frontend/package-lock.json | 64 + frontend/package.json | 1 + frontend/src/App.jsx | 1138 +----------------- frontend/src/hooks/useJobPolling.js | 54 + frontend/src/layouts/AppShell.jsx | 22 + frontend/src/layouts/Header.jsx | 52 + frontend/src/layouts/Sidebar.jsx | 42 + frontend/src/lib/crypto.js | 35 + frontend/src/main.jsx | 16 +- frontend/src/pages/ClipGenerator.jsx | 294 +++++ frontend/src/pages/Dashboard.jsx | 14 + frontend/src/pages/Legacy/AIAgent.jsx | 94 ++ frontend/src/pages/Legacy/SaaSShorts.jsx | 15 + frontend/src/pages/Legacy/Thumbnails.jsx | 13 + frontend/src/pages/Legacy/UGCGalleryPage.jsx | 5 + frontend/src/pages/LongForm.jsx | 15 + frontend/src/pages/PageStub.jsx | 21 + frontend/src/pages/Settings.jsx | 162 +++ frontend/src/pages/ShortForm.jsx | 16 + frontend/src/state/jobStore.js | 110 ++ frontend/src/state/keysStore.js | 78 ++ frontend/tailwind.config.js | 18 +- 22 files changed, 1163 insertions(+), 1116 deletions(-) create mode 100644 frontend/src/hooks/useJobPolling.js create mode 100644 frontend/src/layouts/AppShell.jsx create mode 100644 frontend/src/layouts/Header.jsx create mode 100644 frontend/src/layouts/Sidebar.jsx create mode 100644 frontend/src/lib/crypto.js create mode 100644 frontend/src/pages/ClipGenerator.jsx create mode 100644 frontend/src/pages/Dashboard.jsx create mode 100644 frontend/src/pages/Legacy/AIAgent.jsx create mode 100644 frontend/src/pages/Legacy/SaaSShorts.jsx create mode 100644 frontend/src/pages/Legacy/Thumbnails.jsx create mode 100644 frontend/src/pages/Legacy/UGCGalleryPage.jsx create mode 100644 frontend/src/pages/LongForm.jsx create mode 100644 frontend/src/pages/PageStub.jsx create mode 100644 frontend/src/pages/Settings.jsx create mode 100644 frontend/src/pages/ShortForm.jsx create mode 100644 frontend/src/state/jobStore.js create mode 100644 frontend/src/state/keysStore.js diff --git a/frontend/package-lock.json b/frontend/package-lock.json index a25c4436..e7917bfa 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -15,6 +15,7 @@ "lucide-react": "^0.344.0", "react": "^18.2.0", "react-dom": "^18.2.0", + "react-router-dom": "^6.30.3", "remotion": "^4.0.447", "zod": "^4.3.6" }, @@ -872,6 +873,15 @@ "node": ">= 8" } }, + "node_modules/@remix-run/router": { + "version": "1.23.2", + "resolved": "https://registry.npmjs.org/@remix-run/router/-/router-1.23.2.tgz", + "integrity": "sha512-Ic6m2U/rMjTkhERIa/0ZtXJP17QUi2CbWE7cqx4J58M8aA3QTfW+2UlQ4psvTX9IO1RfNVhK3pcpdjej7L+t2w==", + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, "node_modules/@remotion/licensing": { "version": "4.0.447", "resolved": "https://registry.npmjs.org/@remotion/licensing/-/licensing-4.0.447.tgz", @@ -3958,6 +3968,38 @@ "node": ">=0.10.0" } }, + "node_modules/react-router": { + "version": "6.30.3", + "resolved": "https://registry.npmjs.org/react-router/-/react-router-6.30.3.tgz", + "integrity": "sha512-XRnlbKMTmktBkjCLE8/XcZFlnHvr2Ltdr1eJX4idL55/9BbORzyZEaIkBFDhFGCEWBBItsVrDxwx3gnisMitdw==", + "license": "MIT", + "dependencies": { + "@remix-run/router": "1.23.2" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "react": ">=16.8" + } + }, + "node_modules/react-router-dom": { + "version": "6.30.3", + "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-6.30.3.tgz", + "integrity": "sha512-pxPcv1AczD4vso7G4Z3TKcvlxK7g7TNt3/FNGMhfqyntocvYKj+GCatfigGDjbLozC4baguJ0ReCigoDJXb0ag==", + "license": "MIT", + "dependencies": { + "@remix-run/router": "1.23.2", + "react-router": "6.30.3" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "react": ">=16.8", + "react-dom": ">=16.8" + } + }, "node_modules/read-cache": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", @@ -5519,6 +5561,11 @@ "fastq": "^1.6.0" } }, + "@remix-run/router": { + "version": "1.23.2", + "resolved": "https://registry.npmjs.org/@remix-run/router/-/router-1.23.2.tgz", + "integrity": "sha512-Ic6m2U/rMjTkhERIa/0ZtXJP17QUi2CbWE7cqx4J58M8aA3QTfW+2UlQ4psvTX9IO1RfNVhK3pcpdjej7L+t2w==" + }, "@remotion/licensing": { "version": "4.0.447", "resolved": "https://registry.npmjs.org/@remotion/licensing/-/licensing-4.0.447.tgz", @@ -7653,6 +7700,23 @@ "integrity": "sha512-z6F7K9bV85EfseRCp2bzrpyQ0Gkw1uLoCel9XBVWPg/TjRj94SkJzUTGfOa4bs7iJvBWtQG0Wq7wnI0syw3EBQ==", "dev": true }, + "react-router": { + "version": "6.30.3", + "resolved": "https://registry.npmjs.org/react-router/-/react-router-6.30.3.tgz", + "integrity": "sha512-XRnlbKMTmktBkjCLE8/XcZFlnHvr2Ltdr1eJX4idL55/9BbORzyZEaIkBFDhFGCEWBBItsVrDxwx3gnisMitdw==", + "requires": { + "@remix-run/router": "1.23.2" + } + }, + "react-router-dom": { + "version": "6.30.3", + "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-6.30.3.tgz", + "integrity": "sha512-pxPcv1AczD4vso7G4Z3TKcvlxK7g7TNt3/FNGMhfqyntocvYKj+GCatfigGDjbLozC4baguJ0ReCigoDJXb0ag==", + "requires": { + "@remix-run/router": "1.23.2", + "react-router": "6.30.3" + } + }, "read-cache": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", diff --git a/frontend/package.json b/frontend/package.json index 0748bc8b..f08cee3c 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -17,6 +17,7 @@ "lucide-react": "^0.344.0", "react": "^18.2.0", "react-dom": "^18.2.0", + "react-router-dom": "^6.30.3", "remotion": "^4.0.447", "zod": "^4.3.6" }, diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index fb0e9128..89cdfab4 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -1,1113 +1,31 @@ -import React, { useState, useEffect } from 'react'; -import { Upload, FileVideo, Sparkles, Youtube, Instagram, Share2, LogOut, ChevronDown, Check, Activity, LayoutDashboard, Settings, PlusCircle, History, Menu, X, Terminal, Shield, LayoutGrid, Image, Globe, RotateCcw, Calendar, AlertTriangle, KeyRound, Bot, Users, Smartphone, ExternalLink, Copy, CheckCircle2 } from 'lucide-react'; -import KeyInput from './components/KeyInput'; -import MediaInput from './components/MediaInput'; -import ResultCard from './components/ResultCard'; -import ProcessingAnimation from './components/ProcessingAnimation'; -// import Gallery from './components/Gallery'; -import ThumbnailStudio from './components/ThumbnailStudio'; -import SaaShortsTab from './components/SaaShortsTab'; -import UGCGallery from './components/UGCGallery'; -import ScheduleWeekModal from './components/ScheduleWeekModal'; -import { getApiUrl } from './config'; - -// Enhanced "Encryption" using XOR + Base64 with a Salt -// This is better than plain Base64 but still client-side. -const SECRET_KEY = import.meta.env.VITE_ENCRYPTION_KEY || "OpenShorts-Static-Salt-Change-Me"; -const ENCRYPTION_PREFIX = "ENC:"; - -const encrypt = (text) => { - if (!text) return ''; - try { - const xor = text.split('').map((c, i) => - String.fromCharCode(c.charCodeAt(0) ^ SECRET_KEY.charCodeAt(i % SECRET_KEY.length)) - ).join(''); - return ENCRYPTION_PREFIX + btoa(xor); - } catch (e) { - console.error("Encryption failed", e); - return text; - } -}; - -const decrypt = (text) => { - if (!text) return ''; - if (text.startsWith(ENCRYPTION_PREFIX)) { - try { - const raw = text.slice(ENCRYPTION_PREFIX.length); - // Check if it's plain base64 or our custom XOR (simple try) - const xor = atob(raw); - const result = xor.split('').map((c, i) => - String.fromCharCode(c.charCodeAt(0) ^ SECRET_KEY.charCodeAt(i % SECRET_KEY.length)) - ).join(''); - return result; - } catch (e) { - // Fallback if decryption fails (might be old plain text) - return ''; - } - } - // Backward compatibility: If no prefix, assume old plain text (or return empty if you want to force re-login) - // For migration: Return text as is, so it populates the field, and next save will encrypt it. - return text; -}; - -// Simple TikTok icon sine Lucide might not have it or it varies -const TikTokIcon = ({ size = 16, className = "" }) => ( - <svg width={size} height={size} viewBox="0 0 24 24" fill="currentColor" className={className}> - <path d="M19.589 6.686a4.793 4.793 0 0 1-3.77-4.245V2h-3.445v13.672a2.896 2.896 0 0 1-5.201 1.743l-.002-.001.002.001a2.895 2.895 0 0 1 3.183-4.51v-3.5a6.329 6.329 0 0 0-5.394 10.692 6.33 6.33 0 0 0 10.857-4.424V8.687a8.182 8.182 0 0 0 4.773 1.526V6.79a4.831 4.831 0 0 1-1.003-.104z" /> - </svg> -); - -const UserProfileSelector = ({ profiles, selectedUserId, onSelect }) => { - const [isOpen, setIsOpen] = useState(false); - - if (!profiles || profiles.length === 0) return null; - - const selectedProfile = profiles.find(p => p.username === selectedUserId) || profiles[0]; - - return ( - <div className="relative z-50"> - <button - onClick={() => setIsOpen(!isOpen)} - className="flex items-center justify-between bg-surface border border-white/10 rounded-lg px-3 py-2 text-sm text-zinc-300 hover:bg-white/5 transition-colors min-w-[180px]" - > - <span className="flex items-center gap-2"> - <div className="w-5 h-5 rounded-full bg-gradient-to-br from-primary to-purple-600 flex items-center justify-center text-[10px] font-bold text-white"> - {selectedProfile?.username?.substring(0, 1).toUpperCase() || "U"} - </div> - <span className="font-medium text-white truncate max-w-[100px]">{selectedProfile?.username || "Select User"}</span> - </span> - <ChevronDown size={14} className={`text-zinc-500 transition-transform ${isOpen ? 'rotate-180' : ''}`} /> - </button> - - {isOpen && ( - <div className="absolute top-full mt-2 right-0 w-64 bg-[#1a1a1a] border border-white/10 rounded-xl shadow-2xl overflow-hidden"> - <div className="max-h-60 overflow-y-auto custom-scrollbar"> - {profiles.map((profile) => ( - <button - key={profile.username} - onClick={() => { - onSelect(profile.username); - setIsOpen(false); - }} - className="w-full flex items-center justify-between px-4 py-3 hover:bg-white/5 transition-colors text-left group border-b border-white/5 last:border-0" - > - <div className="flex items-center gap-3"> - <div className="w-8 h-8 rounded-full bg-gradient-to-br from-primary/20 to-purple-500/20 flex items-center justify-center text-xs font-bold text-white border border-white/10 shrink-0"> - {profile.username.substring(0, 2).toUpperCase()} - </div> - <div className="min-w-0"> - <div className="text-sm font-medium text-zinc-200 group-hover:text-white transition-colors truncate"> - {profile.username} - </div> - <div className="flex gap-2 mt-0.5"> - {/* Status indicators */} - <div className={`flex items-center gap-1 text-[10px] ${profile.connected.includes('tiktok') ? 'text-zinc-300' : 'text-zinc-600'}`}> - <TikTokIcon size={10} /> - </div> - <div className={`flex items-center gap-1 text-[10px] ${profile.connected.includes('instagram') ? 'text-pink-400' : 'text-zinc-600'}`}> - <Instagram size={10} /> - </div> - <div className={`flex items-center gap-1 text-[10px] ${profile.connected.includes('youtube') ? 'text-red-400' : 'text-zinc-600'}`}> - <Youtube size={10} /> - </div> - </div> - </div> - </div> - {selectedUserId === profile.username && <Check size={14} className="text-primary shrink-0" />} - </button> - ))} - </div> - </div> - )} - </div> - ); -}; - -const SESSION_KEY = 'openshorts_session'; -const SESSION_MAX_AGE = 3600000; // 1 hour (matches server job retention) - -// Mock polling function -const pollJob = async (jobId) => { - const res = await fetch(getApiUrl(`/api/status/${jobId}`)); - if (!res.ok) throw new Error('Status check failed'); - return res.json(); -}; - -function App() { - const [apiKey, setApiKey] = useState(localStorage.getItem('gemini_key') || ''); - // Social API State - Load encrypted or plain - const [uploadPostKey, setUploadPostKey] = useState(() => { - const stored = localStorage.getItem('uploadPostKey_v3'); - if (stored) return decrypt(stored); - return ''; - }); - // ElevenLabs API State - Load encrypted - const [elevenLabsKey, setElevenLabsKey] = useState(() => { - const stored = localStorage.getItem('elevenLabsKey_v1'); - if (stored) return decrypt(stored); - return ''; - }); - - // fal.ai API State - Load encrypted - const [falKey, setFalKey] = useState(() => { - const stored = localStorage.getItem('falKey_v1'); - if (stored) return decrypt(stored); - return ''; - }); - - const [uploadUserId, setUploadUserId] = useState(() => localStorage.getItem('uploadUserId') || ''); - const [userProfiles, setUserProfiles] = useState([]); // List of {username, connected: []} - const [showKeyModal, setShowKeyModal] = useState(false); - const [jobId, setJobId] = useState(null); - const [status, setStatus] = useState('idle'); // idle, processing, complete, error - const [results, setResults] = useState(null); - const [logs, setLogs] = useState([]); - const [logsVisible, setLogsVisible] = useState(true); - const [processingMedia, setProcessingMedia] = useState(null); - const [activeTab, setActiveTab] = useState('dashboard'); // dashboard, settings - - const [sessionRecovered, setSessionRecovered] = useState(false); - const [showScheduleWeek, setShowScheduleWeek] = useState(false); - - // Sync state for original video playback - const [syncedTime, setSyncedTime] = useState(0); - const [isSyncedPlaying, setIsSyncedPlaying] = useState(false); - const [syncTrigger, setSyncTrigger] = useState(0); - - const handleClipPlay = (startTime) => { - setSyncedTime(startTime); - setIsSyncedPlaying(true); - setSyncTrigger(prev => prev + 1); - }; - - const handleClipPause = () => { - setIsSyncedPlaying(false); - }; - - // Session Recovery: Restore on mount - useEffect(() => { - try { - const saved = localStorage.getItem(SESSION_KEY); - if (!saved) return; - const session = JSON.parse(saved); - if (Date.now() - session.timestamp > SESSION_MAX_AGE) { - localStorage.removeItem(SESSION_KEY); - return; - } - if (session.jobId && session.status && session.status !== 'idle') { - setJobId(session.jobId); - setResults(session.results || null); - if (session.processingMedia) setProcessingMedia(session.processingMedia); - if (session.activeTab) setActiveTab(session.activeTab); - // If was processing, resume polling; if complete/error, just show results - setStatus(session.status === 'processing' ? 'processing' : session.status); - setSessionRecovered(true); - setTimeout(() => setSessionRecovered(false), 5000); - } - } catch (e) { - localStorage.removeItem(SESSION_KEY); - } - }, []); - - // Session Recovery: Save state changes - useEffect(() => { - if (status === 'idle') { - localStorage.removeItem(SESSION_KEY); - return; - } - try { - const sessionData = { - jobId, - status, - results, - processingMedia: processingMedia?.type === 'url' ? processingMedia : null, - activeTab, - timestamp: Date.now() - }; - localStorage.setItem(SESSION_KEY, JSON.stringify(sessionData)); - } catch (e) { - // localStorage full or serialization error - ignore - } - }, [jobId, status, results, activeTab]); - - useEffect(() => { - // Encrypt Gemini Key too for consistency if desired, but user asked specifically about Social integration not saving well. - // For now keeping gemini plain for compatibility unless requested. - if (apiKey) localStorage.setItem('gemini_key', apiKey); - }, [apiKey]); - - useEffect(() => { - if (uploadPostKey) { - localStorage.setItem('uploadPostKey_v3', encrypt(uploadPostKey)); - } - if (uploadUserId) { - localStorage.setItem('uploadUserId', uploadUserId); - } - }, [uploadPostKey, uploadUserId]); - - useEffect(() => { - if (elevenLabsKey) { - localStorage.setItem('elevenLabsKey_v1', encrypt(elevenLabsKey)); - } - }, [elevenLabsKey]); - - useEffect(() => { - if (falKey) { - localStorage.setItem('falKey_v1', encrypt(falKey)); - } - }, [falKey]); - - useEffect(() => { - if (uploadPostKey && userProfiles.length === 0) { - fetchUserProfiles(); - } - }, [uploadPostKey]); - - useEffect(() => { - let interval; - if ((status === 'processing' || status === 'completed') && jobId) { - interval = setInterval(async () => { - try { - const data = await pollJob(jobId); - console.log("Job status:", data); - - // Update results if available (real-time) - if (data.result) { - setResults(data.result); - } - - if (data.status === 'completed') { - setStatus('complete'); - clearInterval(interval); - } else if (data.status === 'failed') { - setStatus('error'); - const errorMsg = data.error || (data.logs && data.logs.length > 0 ? data.logs[data.logs.length - 1] : "Process failed"); - setLogs(prev => [...prev, "Error: " + errorMsg]); - clearInterval(interval); - } else { - // Update logs if available - if (data.logs) setLogs(data.logs); - } - } catch (e) { - console.error("Polling error", e); - } - }, 2000); - } - return () => clearInterval(interval); - }, [status, jobId]); - - - const fetchUserProfiles = async () => { - if (!uploadPostKey) return; - try { - const res = await fetch(getApiUrl('/api/social/user'), { - headers: { 'X-Upload-Post-Key': uploadPostKey } - }); - if (!res.ok) throw new Error("Failed to fetch"); - const data = await res.json(); - if (data.profiles && data.profiles.length > 0) { - setUserProfiles(data.profiles); - // Auto select first if none selected - if (!uploadUserId) { - setUploadUserId(data.profiles[0].username); - } - } else { - alert("No profiles found for this API Key."); - } - } catch (e) { - alert("Error fetching User Profiles. Please check key."); - console.error(e); - } - }; - - const handleProcess = async (data) => { - if (!apiKey || !uploadPostKey) { - setShowKeyModal(true); - return; - } - setStatus('processing'); - setLogs(["Starting process..."]); - setResults(null); - setProcessingMedia(data); - - try { - let body; - const headers = { 'X-Gemini-Key': apiKey }; - - if (data.type === 'url') { - headers['Content-Type'] = 'application/json'; - body = JSON.stringify({ url: data.payload, acknowledged: !!data.acknowledged }); - } else { - const formData = new FormData(); - formData.append('file', data.payload); - formData.append('acknowledged', data.acknowledged ? 'true' : 'false'); - body = formData; - } - - const res = await fetch(getApiUrl('/api/process'), { - method: 'POST', - headers: data.type === 'url' ? headers : { 'X-Gemini-Key': apiKey }, - body - }); - - if (!res.ok) throw new Error(await res.text()); - const resData = await res.json(); - setJobId(resData.job_id); - - } catch (e) { - setStatus('error'); - setLogs(l => [...l, `Error starting job: ${e.message}`]); - } - }; - - const handleReset = () => { - setStatus('idle'); - setJobId(null); - setResults(null); - setLogs([]); - setProcessingMedia(null); - localStorage.removeItem(SESSION_KEY); - }; - - // --- UI Components --- - - const Sidebar = () => ( - <div className="w-20 lg:w-64 bg-surface border-r border-white/5 flex flex-col h-full shrink-0 transition-all duration-300"> - <div className="p-6 flex items-center gap-3"> - <div className="w-8 h-8 bg-white/5 rounded-lg flex items-center justify-center shrink-0 overflow-hidden border border-white/5"> - <img src="/logo-openshorts.png" alt="Logo" className="w-full h-full object-cover" /> - </div> - <span className="font-bold text-lg text-white hidden lg:block tracking-tight">OpenShorts</span> - </div> - - <nav className="flex-1 px-4 py-4 space-y-2"> - <button - onClick={() => setActiveTab('dashboard')} - className={`w-full flex items-center gap-3 px-3 py-3 rounded-xl transition-colors ${activeTab === 'dashboard' ? 'bg-primary/10 text-primary' : 'text-zinc-400 hover:text-white hover:bg-white/5'}`} - > - <LayoutDashboard size={20} /> - <span className="font-medium hidden lg:block">Clip Generator</span> - </button> - - <button - onClick={() => setActiveTab('saasshorts')} - className={`w-full flex items-center gap-3 px-3 py-3 rounded-xl transition-colors ${activeTab === 'saasshorts' ? 'bg-violet-500/10 text-violet-400' : 'text-zinc-400 hover:text-white hover:bg-white/5'}`} - > - <Sparkles size={20} /> - <span className="font-medium hidden lg:block">AI Shorts</span> - </button> - - <button - onClick={() => setActiveTab('ai-agent')} - className={`w-full flex items-center gap-3 px-3 py-3 rounded-xl transition-colors ${activeTab === 'ai-agent' ? 'bg-emerald-500/10 text-emerald-400' : 'text-zinc-400 hover:text-white hover:bg-white/5'}`} - > - <Bot size={20} /> - <span className="font-medium hidden lg:block">AI Agent</span> - </button> - - <button - onClick={() => setActiveTab('ugc-gallery')} - className={`w-full flex items-center gap-3 px-3 py-3 rounded-xl transition-colors ${activeTab === 'ugc-gallery' ? 'bg-violet-500/10 text-violet-400' : 'text-zinc-400 hover:text-white hover:bg-white/5'}`} - > - <LayoutGrid size={20} /> - <span className="font-medium hidden lg:block">UGC Gallery</span> - </button> - - <button - onClick={() => setActiveTab('thumbnails')} - className={`w-full flex items-center gap-3 px-3 py-3 rounded-xl transition-colors ${activeTab === 'thumbnails' ? 'bg-primary/10 text-primary' : 'text-zinc-400 hover:text-white hover:bg-white/5'}`} - > - <Image size={20} /> - <span className="font-medium hidden lg:block">YouTube Studio</span> - </button> - - {/* <button - onClick={() => setActiveTab('gallery')} - className={`w-full flex items-center gap-3 px-3 py-3 rounded-xl transition-colors ${activeTab === 'gallery' ? 'bg-primary/10 text-primary' : 'text-zinc-400 hover:text-white hover:bg-white/5'}`} - > - <LayoutGrid size={20} /> - <span className="font-medium hidden lg:block">Gallery</span> - </button> */} - - <button - onClick={() => setActiveTab('settings')} - className={`w-full flex items-center gap-3 px-3 py-3 rounded-xl transition-colors ${activeTab === 'settings' ? 'bg-primary/10 text-primary' : 'text-zinc-400 hover:text-white hover:bg-white/5'}`} - > - <Settings size={20} /> - <span className="font-medium hidden lg:block">Settings</span> - </button> - </nav> - - <div className="p-4 border-t border-white/5 space-y-2"> - <a - href="#" - onClick={(e) => { e.preventDefault(); localStorage.removeItem('openshorts_skip_landing'); window.location.hash = ''; window.location.reload(); }} - className="flex items-center gap-2 p-3 bg-white/5 hover:bg-white/10 rounded-xl transition-colors group" - > - <div className="w-8 h-8 rounded-full bg-primary/20 text-primary flex items-center justify-center shrink-0"> - <Globe size={16} /> - </div> - <div className="hidden lg:block overflow-hidden"> - <p className="text-sm font-bold text-white leading-none mb-0.5">Landing Page</p> - <p className="text-[10px] text-zinc-400 group-hover:text-zinc-300 transition-colors truncate">View website</p> - </div> - </a> - <a - href="https://github.com/mutonby/openshorts" - target="_blank" - rel="noopener noreferrer" - className="flex items-center gap-2 p-3 bg-white/5 hover:bg-white/10 rounded-xl transition-colors group" - > - <div className="w-8 h-8 rounded-full bg-white text-black flex items-center justify-center shrink-0"> - <svg height="20" viewBox="0 0 16 16" version="1.1" width="20" aria-hidden="true"><path fillRule="evenodd" d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"></path></svg> - </div> - <div className="hidden lg:block overflow-hidden"> - <p className="text-sm font-bold text-white leading-none mb-0.5">Open Source</p> - <p className="text-[10px] text-zinc-400 group-hover:text-zinc-300 transition-colors truncate">Free & Community Driven</p> - </div> - </a> - </div> - </div> - ); - +import { Navigate, Route, Routes } from 'react-router-dom'; +import AppShell from './layouts/AppShell.jsx'; +import Dashboard from './pages/Dashboard.jsx'; +import ShortForm from './pages/ShortForm.jsx'; +import LongForm from './pages/LongForm.jsx'; +import ClipGenerator from './pages/ClipGenerator.jsx'; +import Settings from './pages/Settings.jsx'; +import LegacySaaSShorts from './pages/Legacy/SaaSShorts.jsx'; +import LegacyThumbnails from './pages/Legacy/Thumbnails.jsx'; +import LegacyUGCGallery from './pages/Legacy/UGCGalleryPage.jsx'; +import LegacyAIAgent from './pages/Legacy/AIAgent.jsx'; + +export default function App() { return ( - <div className="flex h-screen bg-background overflow-hidden selection:bg-primary/30"> - <Sidebar /> - - <main className="flex-1 flex flex-col h-full overflow-hidden relative"> - {/* Background Gradients */} - <div className="absolute inset-0 overflow-hidden -z-10 pointer-events-none"> - <div className="absolute -top-[10%] -right-[10%] w-[50%] h-[50%] bg-primary/5 rounded-full blur-[120px]" /> - </div> - - {/* Top Header */} - <header className="h-16 border-b border-white/5 bg-background/50 backdrop-blur-md flex items-center justify-between px-6 shrink-0 z-10"> - <div className="flex items-center gap-4"> - {status !== 'idle' && ( - <button - onClick={handleReset} - className="flex items-center gap-2 text-sm text-zinc-400 hover:text-white transition-colors" - > - <PlusCircle size={16} /> - <span className="hidden sm:inline">New Project</span> - </button> - )} - </div> - - <div className="flex items-center gap-4"> - {userProfiles.length > 0 && ( - <UserProfileSelector - profiles={userProfiles} - selectedUserId={uploadUserId} - onSelect={setUploadUserId} - /> - )} - - {(!apiKey || !uploadPostKey) && ( - <button - onClick={() => setActiveTab('settings')} - className="text-xs text-amber-400 bg-amber-500/10 hover:bg-amber-500/20 px-3 py-1 rounded-full border border-amber-500/30 transition-colors flex items-center gap-1.5" - title="Click to configure your API keys" - > - <AlertTriangle size={12} /> - {!apiKey && !uploadPostKey - ? 'Gemini & Upload-Post keys missing' - : !apiKey - ? 'Gemini API Key Missing' - : 'Upload-Post API Key Missing'} - </button> - )} - </div> - </header> - - {/* Persistent Missing Keys Banner — visible on every screen */} - {(!apiKey || !uploadPostKey) && activeTab !== 'settings' && ( - <div className="mx-6 mt-3 p-3 bg-amber-500/10 border border-amber-500/30 rounded-xl flex items-center justify-between gap-4 shrink-0 animate-[fadeIn_0.3s_ease-out]"> - <div className="flex items-center gap-3 text-sm text-amber-200"> - <KeyRound size={16} className="shrink-0 text-amber-400" /> - <div> - <span className="font-semibold">Required API keys missing.</span>{' '} - <span className="text-amber-200/80"> - {!apiKey && !uploadPostKey - ? 'Set your Gemini and Upload-Post API keys to use OpenShorts.' - : !apiKey - ? 'Set your Gemini API key to use OpenShorts.' - : 'Set your Upload-Post API key to use OpenShorts.'} - </span> - </div> - </div> - <button - onClick={() => setActiveTab('settings')} - className="shrink-0 text-xs font-medium px-3 py-1.5 rounded-lg bg-amber-500 hover:bg-amber-400 text-black transition-colors" - > - Go to Settings - </button> - </div> - )} - - {/* Session Recovery Banner */} - {sessionRecovered && ( - <div className="mx-6 mt-2 p-3 bg-primary/10 border border-primary/20 rounded-xl flex items-center justify-between animate-[fadeIn_0.3s_ease-out] shrink-0"> - <div className="flex items-center gap-2 text-sm text-primary"> - <RotateCcw size={16} /> - <span className="font-medium">Session recovered</span> - <span className="text-zinc-400 text-xs">Your previous work has been restored.</span> - </div> - <button onClick={() => setSessionRecovered(false)} className="text-zinc-500 hover:text-white transition-colors"> - <X size={14} /> - </button> - </div> - )} - - {/* Main Workspace */} - <div className="flex-1 overflow-hidden relative"> - - {/* View: Settings */} - {activeTab === 'settings' && ( - <div className="h-full overflow-y-auto p-8 max-w-2xl mx-auto animate-[fadeIn_0.3s_ease-out]"> - <div className="flex items-center justify-between mb-8"> - <h1 className="text-2xl font-bold">Settings</h1> - <div className="px-3 py-1 bg-green-500/10 border border-green-500/20 rounded-full text-[10px] text-green-400 font-medium flex items-center gap-2"> - <Shield size={12} /> Privacy: keys only live in your browser (sent to backend just to process) - </div> - </div> - <KeyInput onKeySet={setApiKey} savedKey={apiKey} /> - - <div className={`glass-panel p-6 mt-8 ${!uploadPostKey ? 'border-amber-500/30 ring-1 ring-amber-500/20' : ''}`}> - <div className="flex items-center justify-between mb-4"> - <h2 className="text-lg font-semibold">Social Integration</h2> - <span className="text-[10px] bg-amber-500/10 border border-amber-500/30 px-2 py-0.5 rounded text-amber-400 uppercase tracking-wider">Required</span> - </div> - <p className="text-xs text-zinc-500 mb-6 leading-relaxed"> - Required to publish your clips to TikTok, Instagram Reels, and YouTube Shorts via <strong>Upload-Post</strong>. - Includes a <strong>free tier</strong> (no credit card required). - </p> - <div className="space-y-4"> - <label className="block text-sm text-zinc-400">Upload-Post API Key</label> - <div className="flex gap-2"> - <input - type="password" - value={uploadPostKey} - onChange={(e) => setUploadPostKey(e.target.value)} - className="input-field" - placeholder="ey..." - /> - <button onClick={fetchUserProfiles} className="btn-primary py-2 px-4 text-sm"> - Connect - </button> - </div> - <p className="text-xs text-zinc-500 leading-relaxed"> - Connect your Upload-Post account to enable one-click publishing. - <div className="mt-3 grid grid-cols-1 sm:grid-cols-3 gap-2"> - <a href="https://app.upload-post.com/login" target="_blank" rel="noopener noreferrer" className="p-2 border border-white/5 rounded-lg hover:bg-white/5 transition-colors flex flex-col gap-1"> - <span className="text-zinc-400 font-medium">1. Login</span> - <span className="text-[10px] text-zinc-600">Register account</span> - </a> - <a href="https://app.upload-post.com/manage-users" target="_blank" rel="noopener noreferrer" className="p-2 border border-white/5 rounded-lg hover:bg-white/5 transition-colors flex flex-col gap-1"> - <span className="text-zinc-400 font-medium">2. Profiles</span> - <span className="text-[10px] text-zinc-600">Create & Connect</span> - </a> - <a href="https://app.upload-post.com/api-keys" target="_blank" rel="noopener noreferrer" className="p-2 border border-white/5 rounded-lg hover:bg-white/5 transition-colors flex flex-col gap-1"> - <span className="text-zinc-400 font-medium">3. API Key</span> - <span className="text-[10px] text-zinc-600">Generate key</span> - </a> - </div> - <br /> - <span className="text-zinc-600 italic"> - Keys are only stored in your browser. They are sent to the backend only to process your request, never stored server-side. - </span> - </p> - </div> - </div> - - <div className="glass-panel p-6 mt-8"> - <div className="flex items-center justify-between mb-4"> - <h2 className="text-lg font-semibold">Video Translation</h2> - <span className="text-[10px] bg-white/5 border border-white/5 px-2 py-0.5 rounded text-zinc-500 uppercase tracking-wider">Optional</span> - </div> - <p className="text-xs text-zinc-500 mb-6 leading-relaxed"> - Translate your clips to different languages using <strong>ElevenLabs</strong> AI dubbing. - Automatically translates speech while preserving the original voice characteristics. - </p> - <div className="space-y-4"> - <label className="block text-sm text-zinc-400">ElevenLabs API Key</label> - <div className="flex gap-2"> - <input - type="password" - value={elevenLabsKey} - onChange={(e) => setElevenLabsKey(e.target.value)} - className="input-field" - placeholder="sk_..." - /> - <button - onClick={() => { - if (elevenLabsKey) { - localStorage.setItem('elevenLabsKey_v1', encrypt(elevenLabsKey)); - alert('ElevenLabs API Key saved!'); - } - }} - className="btn-primary py-2 px-4 text-sm" - > - Save - </button> - </div> - <p className="text-xs text-zinc-500 leading-relaxed"> - Get your API key from ElevenLabs to enable video translation. - <div className="mt-3 grid grid-cols-1 sm:grid-cols-2 gap-2"> - <a href="https://elevenlabs.io/sign-up" target="_blank" rel="noopener noreferrer" className="p-2 border border-white/5 rounded-lg hover:bg-white/5 transition-colors flex flex-col gap-1"> - <span className="text-zinc-400 font-medium">1. Sign Up</span> - <span className="text-[10px] text-zinc-600">Create account</span> - </a> - <a href="https://elevenlabs.io/app/settings/api-keys" target="_blank" rel="noopener noreferrer" className="p-2 border border-white/5 rounded-lg hover:bg-white/5 transition-colors flex flex-col gap-1"> - <span className="text-zinc-400 font-medium">2. API Key</span> - <span className="text-[10px] text-zinc-600">Generate key</span> - </a> - </div> - <br /> - <span className="text-zinc-600 italic"> - Keys are only stored in your browser. They are sent to the backend only to process your request, never stored server-side. - </span> - </p> - </div> - </div> - - <div className="glass-panel p-6 mt-8"> - <div className="flex items-center justify-between mb-4"> - <h2 className="text-lg font-semibold">AI Shorts (UGC Videos)</h2> - <span className="text-[10px] bg-violet-500/10 border border-violet-500/20 px-2 py-0.5 rounded text-violet-400 uppercase tracking-wider">New</span> - </div> - <p className="text-xs text-zinc-500 mb-6 leading-relaxed"> - Generate UGC-style videos with AI actors for any product or business using <strong>fal.ai</strong>. - Just describe your product or paste a URL. Requires fal.ai + ElevenLabs API keys. - </p> - <div className="space-y-4"> - <label className="block text-sm text-zinc-400">fal.ai API Key</label> - <div className="flex gap-2"> - <input - type="password" - value={falKey} - onChange={(e) => setFalKey(e.target.value)} - className="input-field" - placeholder="fal_..." - /> - <button - onClick={() => { - if (falKey) { - localStorage.setItem('falKey_v1', encrypt(falKey)); - alert('fal.ai API Key saved!'); - } - }} - className="btn-primary py-2 px-4 text-sm" - > - Save - </button> - </div> - <p className="text-xs text-zinc-500 leading-relaxed"> - Get your API key from fal.ai to enable AI actor video generation. - <div className="mt-3 grid grid-cols-1 sm:grid-cols-2 gap-2"> - <a href="https://fal.ai/dashboard/keys" target="_blank" rel="noopener noreferrer" className="p-2 border border-white/5 rounded-lg hover:bg-white/5 transition-colors flex flex-col gap-1"> - <span className="text-zinc-400 font-medium">1. Sign Up</span> - <span className="text-[10px] text-zinc-600">Create fal.ai account</span> - </a> - <a href="https://fal.ai/dashboard/keys" target="_blank" rel="noopener noreferrer" className="p-2 border border-white/5 rounded-lg hover:bg-white/5 transition-colors flex flex-col gap-1"> - <span className="text-zinc-400 font-medium">2. API Key</span> - <span className="text-[10px] text-zinc-600">Generate key</span> - </a> - </div> - <br /> - <span className="text-zinc-600 italic"> - Keys are only stored in your browser. Sent to backend only to process requests. - </span> - </p> - </div> - </div> - </div> - )} - - {/* View: SaaS Shorts */} - {activeTab === 'saasshorts' && ( - <SaaShortsTab geminiApiKey={apiKey} elevenLabsKey={elevenLabsKey} falKey={falKey} uploadPostKey={uploadPostKey} uploadUserId={uploadUserId} /> - )} - - {/* View: AI Agent */} - {activeTab === 'ai-agent' && ( - <div className="h-full overflow-y-auto custom-scrollbar p-6 md:p-10 animate-[fadeIn_0.3s_ease-out]"> - <div className="max-w-4xl mx-auto space-y-8"> - - {/* Header */} - <div className="space-y-3"> - <div className="inline-flex items-center gap-2 px-3 py-1 rounded-full bg-emerald-500/10 border border-emerald-500/30 text-[11px] uppercase tracking-wider text-emerald-400 font-semibold"> - <Bot size={12} /> Autonomous Skill - </div> - <h1 className="text-3xl md:text-4xl font-black bg-gradient-to-b from-white to-white/60 bg-clip-text text-transparent"> - Your Personal Clipping Team - </h1> - <p className="text-zinc-400 text-base md:text-lg leading-relaxed max-w-2xl"> - Drop your videos in a folder and a team of AI clippers picks the viral moments, edits them, and queues them for your approval — like having a 24/7 short-form editing crew on autopilot. - </p> - </div> - - {/* Mobile-format warning */} - <div className="p-4 rounded-xl border border-amber-500/30 bg-amber-500/10 flex items-start gap-3"> - <Smartphone size={20} className="text-amber-400 shrink-0 mt-0.5" /> - <div className="text-sm text-amber-100"> - <p className="font-semibold text-amber-300 mb-1">Upload videos already in vertical (9:16) mobile format.</p> - <p className="text-amber-100/80 leading-relaxed"> - The agent does not reframe horizontal footage. Make sure every source video is shot or pre-cropped to mobile/portrait format before dropping it into the input folder. - </p> - </div> - </div> - - {/* Workflow */} - <div className="grid md:grid-cols-3 gap-4"> - <div className="glass-panel p-5 space-y-2"> - <div className="w-10 h-10 rounded-lg bg-emerald-500/10 text-emerald-400 flex items-center justify-center"> - <Upload size={18} /> - </div> - <h3 className="font-semibold text-white">1. Drop your videos</h3> - <p className="text-xs text-zinc-400 leading-relaxed"> - Put your long-form vertical footage in the watched folder. The skill picks one video per run. - </p> - </div> - - <div className="glass-panel p-5 space-y-2"> - <div className="w-10 h-10 rounded-lg bg-emerald-500/10 text-emerald-400 flex items-center justify-center"> - <Users size={18} /> - </div> - <h3 className="font-semibold text-white">2. AI clippers work</h3> - <p className="text-xs text-zinc-400 leading-relaxed"> - Whisper transcribes, Gemini 3 Flash spots viral beats, FFmpeg cuts each clip and adds a hook overlay. - </p> - </div> - - <div className="glass-panel p-5 space-y-2"> - <div className="w-10 h-10 rounded-lg bg-emerald-500/10 text-emerald-400 flex items-center justify-center"> - <CheckCircle2 size={18} /> - </div> - <h3 className="font-semibold text-white">3. You validate, it ships</h3> - <p className="text-xs text-zinc-400 leading-relaxed"> - Approve the candidates you like and the skill auto-publishes them to TikTok, Reels and YouTube Shorts via Upload-Post. - </p> - </div> - </div> - - {/* Repo CTA */} - <div className="glass-panel p-6 md:p-8 space-y-5"> - <div className="flex items-start justify-between gap-4 flex-wrap"> - <div> - <h2 className="text-xl font-bold text-white mb-1">skill-autoshorts</h2> - <p className="text-sm text-zinc-400"> - The Claude Code skill that powers this workflow. Install it once and trigger it whenever you want a fresh batch of clips. - </p> - </div> - <a - href="https://github.com/mutonby/skill-autoshorts" - target="_blank" - rel="noopener noreferrer" - className="btn-primary py-2 px-4 text-sm flex items-center gap-2 shrink-0" - > - View on GitHub <ExternalLink size={14} /> - </a> - </div> - - <div className="bg-[#0c0c0e] border border-white/10 rounded-lg p-4 font-mono text-xs text-zinc-300 flex items-center justify-between gap-3"> - <span className="truncate">git clone https://github.com/mutonby/skill-autoshorts</span> - <button - onClick={() => navigator.clipboard.writeText('git clone https://github.com/mutonby/skill-autoshorts')} - className="text-zinc-500 hover:text-white transition-colors shrink-0" - title="Copy" - > - <Copy size={14} /> - </button> - </div> - - <div className="grid sm:grid-cols-2 gap-3 text-sm"> - <div className="flex items-start gap-2 text-zinc-300"> - <Check size={16} className="text-emerald-400 shrink-0 mt-0.5" /> - <span>Daily batch — picks one long video per run</span> - </div> - <div className="flex items-start gap-2 text-zinc-300"> - <Check size={16} className="text-emerald-400 shrink-0 mt-0.5" /> - <span>Whisper transcription with word-level timing</span> - </div> - <div className="flex items-start gap-2 text-zinc-300"> - <Check size={16} className="text-emerald-400 shrink-0 mt-0.5" /> - <span>Gemini 3 Flash multimodal moment detection</span> - </div> - <div className="flex items-start gap-2 text-zinc-300"> - <Check size={16} className="text-emerald-400 shrink-0 mt-0.5" /> - <span>Auto-publish to TikTok, Reels & YouTube Shorts</span> - </div> - </div> - </div> - - </div> - </div> - )} - - {/* View: UGC Gallery */} - {activeTab === 'ugc-gallery' && ( - <UGCGallery /> - )} - - {/* View: Thumbnails */} - {activeTab === 'thumbnails' && ( - <ThumbnailStudio geminiApiKey={apiKey} uploadPostKey={uploadPostKey} uploadUserId={uploadUserId} /> - )} - - {/* View: Gallery */} - {/* {activeTab === 'gallery' && ( - <Gallery /> - )} */} - - {/* View: Dashboard (Idle) */} - {activeTab === 'dashboard' && status === 'idle' && ( - <div className="h-full flex flex-col items-center justify-center p-6 animate-[fadeIn_0.3s_ease-out]"> - <div className="max-w-xl w-full text-center space-y-8"> - <div className="space-y-4"> - <h1 className="text-4xl md:text-5xl font-black bg-gradient-to-b from-white to-white/60 bg-clip-text text-transparent"> - Create Viral Shorts - </h1> - <p className="text-zinc-400 text-lg"> - Drop your long-form video below to instantly generate viral clips with AI. - </p> - </div> - - <MediaInput onProcess={handleProcess} isProcessing={status === 'processing'} /> - - <div className="flex items-center justify-center gap-8 text-zinc-500 text-sm"> - <span className="flex items-center gap-2"><Youtube size={16} /> YouTube</span> - <span className="flex items-center gap-2"><Instagram size={16} /> Instagram</span> - <span className="flex items-center gap-2"><TikTokIcon size={16} /> TikTok</span> - </div> - </div> - </div> - )} - - {/* View: Processing / Results (Split View) */} - {activeTab === 'dashboard' && (status === 'processing' || status === 'complete' || status === 'error') && ( - <div className="h-full flex flex-col md:flex-row animate-[fadeIn_0.3s_ease-out]"> - - {/* Left Panel: Preview & Status */} - <div className={`${status === 'complete' ? 'w-full md:w-[30%] lg:w-[25%]' : 'w-full md:w-[55%] lg:w-[60%]'} h-full flex flex-col border-r border-white/5 bg-black/20 p-6 overflow-y-auto custom-scrollbar transition-all duration-700 ease-in-out`}> - <div className="mb-6 flex items-center justify-between"> - <h2 className="text-lg font-semibold flex items-center gap-2"> - <Activity className={`text-primary ${status === 'processing' ? 'animate-pulse' : ''}`} size={20} /> - Live Analysis - </h2> - <span className={`text-xs px-2 py-1 rounded-full border ${status === 'processing' ? 'bg-primary/10 border-primary/20 text-primary' : - status === 'complete' ? 'bg-green-500/10 border-green-500/20 text-green-400' : - 'bg-red-500/10 border-red-500/20 text-red-400' - }`}> - {status.toUpperCase()} - </span> - </div> - - {/* Video Preview */} - {processingMedia && ( - <ProcessingAnimation - media={processingMedia} - isComplete={status === 'complete'} - syncedTime={syncedTime} - isSyncedPlaying={isSyncedPlaying} - syncTrigger={syncTrigger} - /> - )} - - {/* Logs Terminal */} - <div className={`bg-[#0c0c0e] rounded-xl border border-white/10 overflow-hidden flex flex-col transition-all duration-500 ${status === 'complete' ? 'h-32 min-h-0 opacity-50 hover:opacity-100' : 'flex-1 min-h-[200px]'}`}> - <div className="px-4 py-2 border-b border-white/5 flex items-center justify-between bg-white/5 shrink-0"> - <span className="text-xs font-mono text-zinc-400 flex items-center gap-2"> - <Terminal size={12} /> System Logs - </span> - <button onClick={() => setLogsVisible(!logsVisible)} className="text-zinc-500 hover:text-white transition-colors"> - {logsVisible ? <ChevronDown size={14} /> : <ChevronDown size={14} className="rotate-180" />} - </button> - </div> - {logsVisible && ( - <div className="flex-1 p-4 overflow-y-auto font-mono text-xs space-y-1.5 custom-scrollbar text-zinc-400"> - {logs.map((log, i) => ( - <div key={i} className={`flex gap-2 ${log.toLowerCase().includes('error') ? 'text-red-400' : 'text-zinc-400'}`}> - <span className="text-zinc-700 shrink-0">{new Date().toLocaleTimeString()}</span> - <span>{log}</span> - </div> - ))} - {status === 'processing' && ( - <div className="animate-pulse text-primary/70">_</div> - )} - </div> - )} - </div> - </div> - - {/* Right Panel: Results Grid */} - <div className={`${status === 'complete' ? 'w-full md:w-[70%] lg:w-[75%]' : 'w-full md:w-[45%] lg:w-[40%]'} h-full flex flex-col bg-background p-6 transition-all duration-700 ease-in-out`}> - <h2 className="text-lg font-semibold mb-6 flex items-center gap-2 shrink-0"> - <Sparkles className="text-yellow-400" size={20} /> - Generated Shorts - {results?.clips?.length > 0 && ( - <span className="text-xs bg-white/10 text-white px-2 py-0.5 rounded-full ml-auto"> - {results.clips.length} Clips - </span> - )} - {results?.cost_analysis && ( - <span className="text-xs bg-green-500/10 border border-green-500/20 text-green-400 px-2 py-0.5 rounded-full ml-2" title={`Input: ${results.cost_analysis.input_tokens} | Output: ${results.cost_analysis.output_tokens}`}> - ${results.cost_analysis.total_cost.toFixed(5)} - </span> - )} - {results?.clips?.length > 1 && status === 'complete' && ( - <button - onClick={() => setShowScheduleWeek(true)} - className="ml-auto flex items-center gap-1.5 px-3 py-1.5 bg-gradient-to-r from-purple-500/20 to-indigo-500/20 hover:from-purple-500/30 hover:to-indigo-500/30 border border-purple-500/30 text-purple-300 hover:text-purple-200 rounded-full text-xs font-bold transition-all" - > - <Calendar size={14} /> - Programar Semana - </button> - )} - </h2> - - <div className="flex-1 overflow-y-auto custom-scrollbar p-1"> - {results && results.clips && results.clips.length > 0 ? ( - <div className={`grid gap-4 pb-10 ${status === 'complete' ? 'grid-cols-1 xl:grid-cols-2' : 'grid-cols-1'}`}> - {results.clips.map((clip, i) => ( - <ResultCard - key={i} - clip={clip} - index={i} - jobId={jobId} - uploadPostKey={uploadPostKey} - uploadUserId={uploadUserId} - geminiApiKey={apiKey} - elevenLabsKey={elevenLabsKey} - onPlay={(time) => handleClipPlay(time)} - onPause={handleClipPause} - /> - ))} - </div> - ) : ( - status === 'processing' ? ( - <div className="h-full flex flex-col items-center justify-center text-zinc-500 space-y-4 opacity-50"> - <div className="w-12 h-12 rounded-full border-2 border-zinc-800 border-t-primary animate-spin" /> - <p className="text-sm">Waiting for clips...</p> - </div> - ) : status === 'error' ? ( - <div className="h-full flex flex-col items-center justify-center text-red-400 space-y-2"> - <p>Generation failed.</p> - </div> - ) : null - )} - </div> - </div> - - </div> - )} - - </div> - - </main> - - {/* Missing API Key Modal */} - {showKeyModal && ( - <div className="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm" onClick={() => setShowKeyModal(false)}> - <div className="bg-[#18181b] border border-white/10 rounded-2xl p-6 max-w-md w-full mx-4 space-y-4 shadow-2xl" onClick={(e) => e.stopPropagation()}> - <h2 className="text-lg font-bold text-white"> - {!apiKey && !uploadPostKey - ? 'Required API Keys Missing' - : !apiKey - ? 'Gemini API Key Required' - : 'Upload-Post API Key Required'} - </h2> - <p className="text-sm text-zinc-400"> - OpenShorts needs both a <strong className="text-zinc-200">Gemini</strong> API key and an <strong className="text-zinc-200">Upload-Post</strong> API key. Both have free tiers. - </p> - - {/* Gemini block */} - <div className={`rounded-lg p-4 space-y-2 border ${!apiKey ? 'bg-blue-500/5 border-blue-500/30' : 'bg-white/5 border-white/10 opacity-70'}`}> - <p className="text-xs font-semibold text-zinc-200 flex items-center gap-2"> - {apiKey ? <Check size={12} className="text-green-400" /> : <AlertTriangle size={12} className="text-amber-400" />} - Gemini API Key {apiKey && <span className="text-green-400">— set</span>} - </p> - {!apiKey && ( - <> - <ol className="text-xs text-zinc-400 space-y-1 list-decimal list-inside"> - <li>Go to <a href="https://aistudio.google.com/app/apikey" target="_blank" rel="noopener noreferrer" className="text-blue-400 underline">aistudio.google.com/app/apikey</a></li> - <li>Sign in with your Google account</li> - <li>Click "Create API Key"</li> - <li>Copy the key and paste it below</li> - </ol> - <input - type="text" - placeholder="Paste your Gemini API key here..." - className="w-full bg-black/50 border border-white/20 rounded-lg px-4 py-2.5 text-sm text-white placeholder-zinc-600 focus:outline-none focus:border-blue-500" - onKeyDown={(e) => { - if (e.key === 'Enter' && e.target.value.trim()) { - setApiKey(e.target.value.trim()); - } - }} - /> - </> - )} - </div> - - {/* Upload-Post block */} - <div className={`rounded-lg p-4 space-y-2 border ${!uploadPostKey ? 'bg-violet-500/5 border-violet-500/30' : 'bg-white/5 border-white/10 opacity-70'}`}> - <p className="text-xs font-semibold text-zinc-200 flex items-center gap-2"> - {uploadPostKey ? <Check size={12} className="text-green-400" /> : <AlertTriangle size={12} className="text-amber-400" />} - Upload-Post API Key {uploadPostKey && <span className="text-green-400">— set</span>} - </p> - {!uploadPostKey && ( - <> - <p className="text-xs text-zinc-400"> - Required to publish your clips to TikTok, Instagram Reels, and YouTube Shorts. Free tier available, no credit card needed. - </p> - <ol className="text-xs text-zinc-400 space-y-1 list-decimal list-inside"> - <li>Register at <a href="https://app.upload-post.com/login" target="_blank" rel="noopener noreferrer" className="text-violet-400 underline">app.upload-post.com</a></li> - <li>Connect your TikTok, Instagram, or YouTube accounts</li> - <li>Go to <a href="https://app.upload-post.com/api-keys" target="_blank" rel="noopener noreferrer" className="text-violet-400 underline">API Keys</a> and generate one</li> - <li>Paste it below</li> - </ol> - <input - type="text" - placeholder="Paste your Upload-Post API key here..." - className="w-full bg-black/50 border border-white/20 rounded-lg px-4 py-2.5 text-sm text-white placeholder-zinc-600 focus:outline-none focus:border-violet-500" - onKeyDown={(e) => { - if (e.key === 'Enter' && e.target.value.trim()) { - setUploadPostKey(e.target.value.trim()); - } - }} - /> - </> - )} - </div> - - <div className="flex gap-3"> - <button - onClick={() => setShowKeyModal(false)} - className="flex-1 text-sm text-zinc-400 py-2 rounded-lg border border-white/10 hover:bg-white/5 transition-colors" - > - Cancel - </button> - <button - onClick={() => { setShowKeyModal(false); setActiveTab('settings'); }} - className="flex-1 text-sm text-white py-2 rounded-lg bg-blue-600 hover:bg-blue-500 transition-colors font-medium" - > - Go to Settings - </button> - </div> - </div> - </div> - )} - - <ScheduleWeekModal - isOpen={showScheduleWeek} - onClose={() => setShowScheduleWeek(false)} - clips={results?.clips || []} - jobId={jobId} - uploadPostKey={uploadPostKey} - uploadUserId={uploadUserId} - /> - </div> + <Routes> + <Route element={<AppShell />}> + <Route index element={<Navigate to="/dashboard" replace />} /> + <Route path="dashboard" element={<Dashboard />} /> + <Route path="short-form/*" element={<ShortForm />} /> + <Route path="long-form/*" element={<LongForm />} /> + <Route path="clip-generator" element={<ClipGenerator />} /> + <Route path="settings/*" element={<Settings />} /> + <Route path="legacy/saasshorts" element={<LegacySaaSShorts />} /> + <Route path="legacy/thumbnails" element={<LegacyThumbnails />} /> + <Route path="legacy/ugc" element={<LegacyUGCGallery />} /> + <Route path="legacy/ai-agent" element={<LegacyAIAgent />} /> + <Route path="*" element={<Navigate to="/dashboard" replace />} /> + </Route> + </Routes> ); } - -export default App; diff --git a/frontend/src/hooks/useJobPolling.js b/frontend/src/hooks/useJobPolling.js new file mode 100644 index 00000000..623b95b3 --- /dev/null +++ b/frontend/src/hooks/useJobPolling.js @@ -0,0 +1,54 @@ +// Polls /api/status/{job_id} every 2s while a job is processing. +// Drives the Clip Generator UI — extracted verbatim from the App.jsx +// polling effect that lived around lines 267-298. + +import { useEffect } from 'react'; +import { getApiUrl } from '../config.js'; +import { getJob, updateJob, useJob } from '../state/jobStore.js'; + +async function pollJob(jobId) { + const res = await fetch(getApiUrl(`/api/status/${jobId}`)); + if (!res.ok) throw new Error('Status check failed'); + return res.json(); +} + +export function useJobPolling() { + const { status, jobId } = useJob(); + + useEffect(() => { + if (!(status === 'processing' || status === 'completed') || !jobId) return; + let cancelled = false; + const interval = setInterval(async () => { + try { + const data = await pollJob(jobId); + if (cancelled) return; + const patch = {}; + if (data.result) patch.results = data.result; + if (data.status === 'completed') { + patch.status = 'complete'; + updateJob(patch); + clearInterval(interval); + return; + } + if (data.status === 'failed') { + const errorMsg = data.error + || (data.logs && data.logs.length ? data.logs[data.logs.length - 1] : 'Process failed'); + patch.status = 'error'; + patch.logs = [...(getJob().logs || []), `Error: ${errorMsg}`]; + updateJob(patch); + clearInterval(interval); + return; + } + if (data.logs) patch.logs = data.logs; + if (Object.keys(patch).length) updateJob(patch); + } catch (e) { + console.error('Polling error', e); + } + }, 2000); + + return () => { + cancelled = true; + clearInterval(interval); + }; + }, [status, jobId]); +} diff --git a/frontend/src/layouts/AppShell.jsx b/frontend/src/layouts/AppShell.jsx new file mode 100644 index 00000000..afe43af9 --- /dev/null +++ b/frontend/src/layouts/AppShell.jsx @@ -0,0 +1,22 @@ +import { Outlet } from 'react-router-dom'; +import Sidebar from './Sidebar.jsx'; +import Header from './Header.jsx'; +import { useJobPolling } from '../hooks/useJobPolling.js'; + +export default function AppShell() { + // Keep the polling loop alive globally so a running job survives + // navigation between pages. + useJobPolling(); + + return ( + <div className="flex h-screen bg-background overflow-hidden selection:bg-primary/30"> + <Sidebar /> + <main className="flex-1 flex flex-col h-full overflow-hidden"> + <Header /> + <div className="flex-1 overflow-y-auto custom-scrollbar"> + <Outlet /> + </div> + </main> + </div> + ); +} diff --git a/frontend/src/layouts/Header.jsx b/frontend/src/layouts/Header.jsx new file mode 100644 index 00000000..614058f2 --- /dev/null +++ b/frontend/src/layouts/Header.jsx @@ -0,0 +1,52 @@ +import { useLocation, matchPath } from 'react-router-dom'; +import { Bell } from 'lucide-react'; + +const TITLE_RULES = [ + { pattern: '/dashboard', title: 'Dashboard' }, + { pattern: '/short-form/*', title: 'Short-form' }, + { pattern: '/short-form', title: 'Short-form' }, + { pattern: '/long-form/*', title: 'Long-form' }, + { pattern: '/long-form', title: 'Long-form' }, + { pattern: '/clip-generator', title: 'Clip Generator' }, + { pattern: '/settings/*', title: 'Settings' }, + { pattern: '/settings', title: 'Settings' }, + { pattern: '/legacy/saasshorts', title: 'Legacy · SaaS Shorts' }, + { pattern: '/legacy/thumbnails', title: 'Legacy · YouTube Studio' }, + { pattern: '/legacy/ugc', title: 'Legacy · UGC Gallery' }, + { pattern: '/legacy/ai-agent', title: 'Legacy · AI Agent' }, +]; + +function resolveTitle(pathname) { + for (const rule of TITLE_RULES) { + if (matchPath({ path: rule.pattern, end: rule.pattern.endsWith('*') ? false : true }, pathname)) { + return rule.title; + } + } + return 'OpenShorts'; +} + +export default function Header() { + const location = useLocation(); + const title = resolveTitle(location.pathname); + + // Phase 2 wires real notifications; for now show the bell with no badge. + const unread = 0; + + return ( + <header className="h-[50px] shrink-0 bg-background border-b border-border flex items-center justify-between px-6"> + <h1 className="text-[14px] font-medium text-white tracking-tight">{title}</h1> + <div className="flex items-center gap-2"> + <button + type="button" + className="relative w-8 h-8 flex items-center justify-center rounded-md text-zinc-400 hover:text-white hover:bg-white/5 transition-colors" + aria-label="Notifications" + > + <Bell size={16} /> + {unread > 0 && ( + <span className="absolute top-1.5 right-1.5 w-2 h-2 rounded-full bg-primary" /> + )} + </button> + </div> + </header> + ); +} diff --git a/frontend/src/layouts/Sidebar.jsx b/frontend/src/layouts/Sidebar.jsx new file mode 100644 index 00000000..5a8d61c2 --- /dev/null +++ b/frontend/src/layouts/Sidebar.jsx @@ -0,0 +1,42 @@ +import { NavLink } from 'react-router-dom'; +import { LayoutDashboard, Smartphone, Video, Scissors, Settings as SettingsIcon } from 'lucide-react'; + +const NAV = [ + { to: '/dashboard', label: 'Dashboard', icon: LayoutDashboard }, + { to: '/short-form', label: 'Short-form', icon: Smartphone }, + { to: '/long-form', label: 'Long-form', icon: Video }, + { to: '/clip-generator', label: 'Clip Generator', icon: Scissors }, + { to: '/settings', label: 'Settings', icon: SettingsIcon }, +]; + +export default function Sidebar() { + return ( + <aside className="w-[210px] shrink-0 bg-sidebar border-r border-border flex flex-col h-full"> + <div className="h-[50px] flex items-center gap-3 px-5 border-b border-border"> + <div className="w-7 h-7 rounded-md overflow-hidden bg-white/5 border border-border shrink-0"> + <img src="/logo-openshorts.png" alt="OpenShorts" className="w-full h-full object-cover" /> + </div> + <span className="text-[15px] font-semibold tracking-tight text-white">OpenShorts</span> + </div> + + <nav className="flex-1 px-3 py-4 space-y-1"> + {NAV.map(({ to, label, icon: Icon }) => ( + <NavLink + key={to} + to={to} + className={({ isActive }) => + `flex items-center gap-3 px-3 py-2 rounded-lg text-[13px] transition-colors ${ + isActive + ? 'bg-primary/15 text-primary' + : 'text-zinc-400 hover:text-white hover:bg-white/5' + }` + } + > + <Icon size={16} /> + <span className="font-medium">{label}</span> + </NavLink> + ))} + </nav> + </aside> + ); +} diff --git a/frontend/src/lib/crypto.js b/frontend/src/lib/crypto.js new file mode 100644 index 00000000..31488fea --- /dev/null +++ b/frontend/src/lib/crypto.js @@ -0,0 +1,35 @@ +// Lightweight client-side obfuscation for API keys stored in localStorage. +// XOR + Base64 with a salt. NOT real cryptography — just keeps casual +// browser-storage inspection from reading raw API keys. + +const SECRET_KEY = import.meta.env.VITE_ENCRYPTION_KEY || 'OpenShorts-Static-Salt-Change-Me'; +const ENCRYPTION_PREFIX = 'ENC:'; + +export function encrypt(text) { + if (!text) return ''; + try { + const xor = text.split('').map((c, i) => + String.fromCharCode(c.charCodeAt(0) ^ SECRET_KEY.charCodeAt(i % SECRET_KEY.length)) + ).join(''); + return ENCRYPTION_PREFIX + btoa(xor); + } catch (e) { + console.error('Encryption failed', e); + return text; + } +} + +export function decrypt(text) { + if (!text) return ''; + if (text.startsWith(ENCRYPTION_PREFIX)) { + try { + const raw = text.slice(ENCRYPTION_PREFIX.length); + const xor = atob(raw); + return xor.split('').map((c, i) => + String.fromCharCode(c.charCodeAt(0) ^ SECRET_KEY.charCodeAt(i % SECRET_KEY.length)) + ).join(''); + } catch (e) { + return ''; + } + } + return text; +} diff --git a/frontend/src/main.jsx b/frontend/src/main.jsx index 7bd9579f..819b82b6 100644 --- a/frontend/src/main.jsx +++ b/frontend/src/main.jsx @@ -1,5 +1,6 @@ import { StrictMode, useState, useEffect } from 'react' import { createRoot } from 'react-dom/client' +import { HashRouter } from 'react-router-dom' import './index.css' import App from './App.jsx' import Landing from './Landing.jsx' @@ -9,7 +10,14 @@ function Root() { const resolveView = () => { const hash = window.location.hash; if (hash === '#legal') return 'legal'; - if (hash === '#app' || localStorage.getItem('openshorts_skip_landing') === '1') return 'app'; + // `#app` (Landing's launch hash) or `#/...` (HashRouter paths) both mean + // we're inside the app. skip-landing keeps users out of Landing once + // they've launched at least once. + if ( + hash === '#app' + || hash.startsWith('#/') + || localStorage.getItem('openshorts_skip_landing') === '1' + ) return 'app'; return 'landing'; }; @@ -28,7 +36,11 @@ function Root() { }; if (view === 'legal') return <Legal />; - if (view === 'app') return <App />; + if (view === 'app') return ( + <HashRouter> + <App /> + </HashRouter> + ); return <Landing onLaunchApp={handleLaunchApp} />; } diff --git a/frontend/src/pages/ClipGenerator.jsx b/frontend/src/pages/ClipGenerator.jsx new file mode 100644 index 00000000..6baa0b0f --- /dev/null +++ b/frontend/src/pages/ClipGenerator.jsx @@ -0,0 +1,294 @@ +// Clip Generator — the existing /api/process workflow. Phase 1 carves this +// out of the old App.jsx tab body. Phase 2 will refactor to consume the +// extracted job + keys stores instead of local state. + +import { useEffect, useState } from 'react'; +import { + Activity, AlertTriangle, Calendar, ChevronDown, Instagram, KeyRound, + PlusCircle, RotateCcw, Sparkles, Terminal, X, Youtube, +} from 'lucide-react'; +import MediaInput from '../components/MediaInput'; +import ResultCard from '../components/ResultCard'; +import ProcessingAnimation from '../components/ProcessingAnimation'; +import ScheduleWeekModal from '../components/ScheduleWeekModal'; +import { getApiUrl } from '../config'; +import { useKeys } from '../state/keysStore.js'; +import { + getJob, recoverJob, resetJob, triggerSyncedPause, triggerSyncedPlay, updateJob, useJob, +} from '../state/jobStore.js'; + +const TikTokIcon = ({ size = 16, className = '' }) => ( + <svg width={size} height={size} viewBox="0 0 24 24" fill="currentColor" className={className}> + <path d="M19.589 6.686a4.793 4.793 0 0 1-3.77-4.245V2h-3.445v13.672a2.896 2.896 0 0 1-5.201 1.743l-.002-.001.002.001a2.895 2.895 0 0 1 3.183-4.51v-3.5a6.329 6.329 0 0 0-5.394 10.692 6.33 6.33 0 0 0 10.857-4.424V8.687a8.182 8.182 0 0 0 4.773 1.526V6.79a4.831 4.831 0 0 1-1.003-.104z" /> + </svg> +); + +export default function ClipGenerator() { + const keys = useKeys(); + const job = useJob(); + const { + jobId, status, results, logs, processingMedia, + syncedTime, isSyncedPlaying, syncTrigger, sessionRecovered, + } = job; + + const [logsVisible, setLogsVisible] = useState(true); + const [showScheduleWeek, setShowScheduleWeek] = useState(false); + const [showKeyModal, setShowKeyModal] = useState(false); + + useEffect(() => { + if (status === 'idle') recoverJob(); + }, []); + + const handleProcess = async (data) => { + if (!keys.gemini || !keys.uploadPost) { + setShowKeyModal(true); + return; + } + updateJob({ + status: 'processing', + logs: ['Starting process...'], + results: null, + processingMedia: data, + }); + try { + let body; + const headers = { 'X-Gemini-Key': keys.gemini }; + if (data.type === 'url') { + headers['Content-Type'] = 'application/json'; + body = JSON.stringify({ url: data.payload, acknowledged: !!data.acknowledged }); + } else { + const formData = new FormData(); + formData.append('file', data.payload); + formData.append('acknowledged', data.acknowledged ? 'true' : 'false'); + body = formData; + } + const res = await fetch(getApiUrl('/api/process'), { + method: 'POST', + headers: data.type === 'url' ? headers : { 'X-Gemini-Key': keys.gemini }, + body, + }); + if (!res.ok) throw new Error(await res.text()); + const resData = await res.json(); + updateJob({ jobId: resData.job_id }); + } catch (e) { + updateJob({ + status: 'error', + logs: [...(getJob().logs || []), `Error starting job: ${e.message}`], + }); + } + }; + + return ( + <div className="h-full flex flex-col"> + {/* Reset / new project button row */} + {status !== 'idle' && ( + <div className="px-6 pt-4 flex items-center gap-3"> + <button + onClick={resetJob} + className="flex items-center gap-2 text-[13px] text-zinc-400 hover:text-white transition-colors" + > + <PlusCircle size={14} /> + New project + </button> + <span className={`text-[11px] px-2 py-0.5 rounded-full border ${ + status === 'processing' ? 'bg-primary/10 border-primary/30 text-primary' + : status === 'complete' ? 'bg-success/10 border-success/30 text-success' + : status === 'error' ? 'bg-red-500/10 border-red-500/30 text-red-400' + : 'bg-white/5 border-border text-zinc-500' + }`}> + {String(status).toUpperCase()} + </span> + </div> + )} + + {/* Missing keys banner */} + {(!keys.gemini || !keys.uploadPost) && ( + <div className="mx-6 mt-3 p-3 bg-amber-500/10 border border-amber-500/30 rounded-lg flex items-center justify-between gap-4 shrink-0"> + <div className="flex items-center gap-3 text-[13px] text-amber-200"> + <KeyRound size={14} className="shrink-0 text-amber-400" /> + <div> + <span className="font-semibold">API keys missing.</span>{' '} + <span className="text-amber-200/80"> + {!keys.gemini && !keys.uploadPost ? 'Set Gemini and Upload-Post keys.' + : !keys.gemini ? 'Set your Gemini API key.' + : 'Set your Upload-Post API key.'} + </span> + </div> + </div> + <a + href="#/settings" + className="shrink-0 text-[11px] font-medium px-3 py-1.5 rounded-md bg-amber-500 hover:bg-amber-400 text-black transition-colors" + > + Go to Settings + </a> + </div> + )} + + {sessionRecovered && ( + <div className="mx-6 mt-2 p-3 bg-primary/10 border border-primary/30 rounded-lg flex items-center justify-between shrink-0"> + <div className="flex items-center gap-2 text-[13px] text-primary"> + <RotateCcw size={14} /> + <span className="font-medium">Session recovered</span> + <span className="text-zinc-400 text-[11px]">Your previous work has been restored.</span> + </div> + <button onClick={() => updateJob({ sessionRecovered: false })} className="text-zinc-500 hover:text-white transition-colors"> + <X size={14} /> + </button> + </div> + )} + + <div className="flex-1 overflow-hidden"> + {status === 'idle' && ( + <div className="h-full flex flex-col items-center justify-center p-6"> + <div className="max-w-xl w-full text-center space-y-8"> + <div className="space-y-3"> + <h1 className="text-3xl md:text-4xl font-bold text-white"> + Create Viral Shorts + </h1> + <p className="text-zinc-400 text-[14px]"> + Drop your long-form video below to generate viral clips with AI. + </p> + </div> + <MediaInput onProcess={handleProcess} isProcessing={status === 'processing'} /> + <div className="flex items-center justify-center gap-6 text-zinc-500 text-[12px]"> + <span className="flex items-center gap-2"><Youtube size={14} /> YouTube</span> + <span className="flex items-center gap-2"><Instagram size={14} /> Instagram</span> + <span className="flex items-center gap-2"><TikTokIcon size={14} /> TikTok</span> + </div> + </div> + </div> + )} + + {(status === 'processing' || status === 'complete' || status === 'error') && ( + <div className="h-full flex flex-col md:flex-row"> + <div className={`${status === 'complete' ? 'md:w-[30%] lg:w-[25%]' : 'md:w-[55%] lg:w-[60%]'} h-full flex flex-col border-r border-border bg-black/20 p-6 overflow-y-auto custom-scrollbar transition-all duration-700`}> + <div className="mb-6 flex items-center justify-between"> + <h2 className="text-[15px] font-semibold flex items-center gap-2 text-white"> + <Activity className={`text-primary ${status === 'processing' ? 'animate-pulse' : ''}`} size={18} /> + Live Analysis + </h2> + </div> + + {processingMedia && ( + <ProcessingAnimation + media={processingMedia} + isComplete={status === 'complete'} + syncedTime={syncedTime} + isSyncedPlaying={isSyncedPlaying} + syncTrigger={syncTrigger} + /> + )} + + <div className={`bg-[#0c0c0e] rounded-lg border border-border overflow-hidden flex flex-col transition-all duration-500 ${status === 'complete' ? 'h-32 opacity-50 hover:opacity-100' : 'flex-1 min-h-[200px]'}`}> + <div className="px-4 py-2 border-b border-border flex items-center justify-between bg-white/5 shrink-0"> + <span className="text-[11px] font-mono text-zinc-400 flex items-center gap-2"> + <Terminal size={12} /> System Logs + </span> + <button onClick={() => setLogsVisible(!logsVisible)} className="text-zinc-500 hover:text-white transition-colors"> + <ChevronDown size={14} className={logsVisible ? '' : 'rotate-180'} /> + </button> + </div> + {logsVisible && ( + <div className="flex-1 p-4 overflow-y-auto font-mono text-[11px] space-y-1.5 custom-scrollbar text-zinc-400"> + {logs.map((log, i) => ( + <div key={i} className={`flex gap-2 ${log.toLowerCase().includes('error') ? 'text-red-400' : 'text-zinc-400'}`}> + <span className="text-zinc-700 shrink-0">{new Date().toLocaleTimeString()}</span> + <span>{log}</span> + </div> + ))} + {status === 'processing' && <div className="animate-pulse text-primary/70">_</div>} + </div> + )} + </div> + </div> + + <div className={`${status === 'complete' ? 'md:w-[70%] lg:w-[75%]' : 'md:w-[45%] lg:w-[40%]'} h-full flex flex-col bg-background p-6 transition-all duration-700`}> + <h2 className="text-[15px] font-semibold mb-6 flex items-center gap-2 shrink-0 text-white"> + <Sparkles className="text-yellow-400" size={18} /> + Generated Shorts + {results?.clips?.length > 0 && ( + <span className="text-[11px] bg-white/10 text-white px-2 py-0.5 rounded-full ml-2"> + {results.clips.length} clips + </span> + )} + {results?.cost_analysis && ( + <span className="text-[11px] bg-success/10 border border-success/30 text-success px-2 py-0.5 rounded-full ml-2"> + ${results.cost_analysis.total_cost.toFixed(5)} + </span> + )} + {results?.clips?.length > 1 && status === 'complete' && ( + <button + onClick={() => setShowScheduleWeek(true)} + className="ml-auto flex items-center gap-1.5 px-3 py-1.5 bg-primary/10 hover:bg-primary/20 border border-primary/30 text-primary rounded-md text-[11px] font-medium transition-colors" + > + <Calendar size={12} /> + Schedule week + </button> + )} + </h2> + + <div className="flex-1 overflow-y-auto custom-scrollbar p-1"> + {results?.clips?.length > 0 ? ( + <div className={`grid gap-4 pb-10 ${status === 'complete' ? 'grid-cols-1 xl:grid-cols-2' : 'grid-cols-1'}`}> + {results.clips.map((clip, i) => ( + <ResultCard + key={i} + clip={clip} + index={i} + jobId={jobId} + uploadPostKey={keys.uploadPost} + uploadUserId={keys.uploadUserId} + geminiApiKey={keys.gemini} + elevenLabsKey={keys.elevenLabs} + onPlay={(t) => triggerSyncedPlay(t)} + onPause={triggerSyncedPause} + /> + ))} + </div> + ) : status === 'processing' ? ( + <div className="h-full flex flex-col items-center justify-center text-zinc-500 space-y-3 opacity-60"> + <div className="w-10 h-10 rounded-full border-2 border-zinc-800 border-t-primary animate-spin" /> + <p className="text-[13px]">Waiting for clips...</p> + </div> + ) : status === 'error' ? ( + <div className="h-full flex flex-col items-center justify-center text-red-400"> + <p>Generation failed.</p> + </div> + ) : null} + </div> + </div> + </div> + )} + </div> + + {/* Missing-keys modal */} + {showKeyModal && ( + <div className="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm" onClick={() => setShowKeyModal(false)}> + <div className="bg-surface border border-border rounded-xl p-6 max-w-md w-full mx-4 space-y-4 shadow-2xl" onClick={(e) => e.stopPropagation()}> + <h2 className="text-[15px] font-semibold text-white">API keys required</h2> + <p className="text-[13px] text-zinc-400"> + Gemini and Upload-Post keys are required to generate clips. Both have free tiers. + </p> + <div className="flex gap-2 text-[11px] text-zinc-300"> + <AlertTriangle size={14} className="text-amber-400 shrink-0" /> + <span>Set them in Settings → API Keys.</span> + </div> + <div className="flex gap-3"> + <button onClick={() => setShowKeyModal(false)} className="flex-1 text-[13px] text-zinc-400 py-2 rounded-md border border-border hover:bg-white/5">Cancel</button> + <a href="#/settings" onClick={() => setShowKeyModal(false)} className="flex-1 text-[13px] text-white py-2 rounded-md bg-primary hover:bg-primary/90 text-center font-medium">Go to Settings</a> + </div> + </div> + </div> + )} + + <ScheduleWeekModal + isOpen={showScheduleWeek} + onClose={() => setShowScheduleWeek(false)} + clips={results?.clips || []} + jobId={jobId} + uploadPostKey={keys.uploadPost} + uploadUserId={keys.uploadUserId} + /> + </div> + ); +} diff --git a/frontend/src/pages/Dashboard.jsx b/frontend/src/pages/Dashboard.jsx new file mode 100644 index 00000000..19faa43c --- /dev/null +++ b/frontend/src/pages/Dashboard.jsx @@ -0,0 +1,14 @@ +import PageStub from './PageStub.jsx'; + +export default function Dashboard() { + return ( + <PageStub + title="Dashboard" + description="At-a-glance view of your content pipeline: clips processed, scheduled uploads, and published videos." + todo={[ + 'Phase 4: 3 stat cards (clips processed / scheduled / published)', + 'Phase 4: scheduled-uploads list with platform badges', + ]} + /> + ); +} diff --git a/frontend/src/pages/Legacy/AIAgent.jsx b/frontend/src/pages/Legacy/AIAgent.jsx new file mode 100644 index 00000000..d5de3a4e --- /dev/null +++ b/frontend/src/pages/Legacy/AIAgent.jsx @@ -0,0 +1,94 @@ +// Legacy AI Agent informational page — pulled verbatim from the old +// App.jsx 'ai-agent' tab body. Describes the autonomous clipping skill. + +import { + Bot, Check, CheckCircle2, Copy, ExternalLink, Smartphone, Upload, Users, +} from 'lucide-react'; + +export default function LegacyAIAgent() { + return ( + <div className="p-6 md:p-10 max-w-4xl mx-auto space-y-8"> + <div className="space-y-3"> + <div className="inline-flex items-center gap-2 px-3 py-1 rounded-full bg-success/10 border border-success/30 text-[11px] uppercase tracking-wider text-success font-semibold"> + <Bot size={12} /> Autonomous Skill + </div> + <h1 className="text-3xl md:text-4xl font-bold text-white"> + Your Personal Clipping Team + </h1> + <p className="text-zinc-400 text-base md:text-lg leading-relaxed max-w-2xl"> + Drop your videos in a folder and a team of AI clippers picks the viral moments, edits them, and queues them for your approval — like having a 24/7 short-form editing crew on autopilot. + </p> + </div> + + <div className="p-4 rounded-lg border border-amber-500/30 bg-amber-500/10 flex items-start gap-3"> + <Smartphone size={20} className="text-amber-400 shrink-0 mt-0.5" /> + <div className="text-sm text-amber-100"> + <p className="font-semibold text-amber-300 mb-1">Upload videos already in vertical (9:16) mobile format.</p> + <p className="text-amber-100/80 leading-relaxed"> + The agent does not reframe horizontal footage. Make sure every source video is shot or pre-cropped to mobile/portrait format before dropping it into the input folder. + </p> + </div> + </div> + + <div className="grid md:grid-cols-3 gap-4"> + {[ + { icon: Upload, title: '1. Drop your videos', text: 'Put your long-form vertical footage in the watched folder. The skill picks one video per run.' }, + { icon: Users, title: '2. AI clippers work', text: 'Whisper transcribes, Gemini spots viral beats, FFmpeg cuts each clip and adds a hook overlay.' }, + { icon: CheckCircle2, title: '3. You validate, it ships', text: 'Approve candidates and the skill auto-publishes to TikTok, Reels and YouTube Shorts via Upload-Post.' }, + ].map(({ icon: Icon, title, text }) => ( + <div key={title} className="rounded-xl border border-border bg-surface p-5 space-y-2"> + <div className="w-10 h-10 rounded-lg bg-success/10 text-success flex items-center justify-center"> + <Icon size={18} /> + </div> + <h3 className="font-semibold text-white">{title}</h3> + <p className="text-xs text-zinc-400 leading-relaxed">{text}</p> + </div> + ))} + </div> + + <div className="rounded-xl border border-border bg-surface p-6 md:p-8 space-y-5"> + <div className="flex items-start justify-between gap-4 flex-wrap"> + <div> + <h2 className="text-xl font-bold text-white mb-1">skill-autoshorts</h2> + <p className="text-sm text-zinc-400"> + The Claude Code skill that powers this workflow. Install it once and trigger it whenever you want a fresh batch of clips. + </p> + </div> + <a + href="https://github.com/mutonby/skill-autoshorts" + target="_blank" + rel="noopener noreferrer" + className="btn-primary py-2 px-4 text-sm flex items-center gap-2 shrink-0" + > + View on GitHub <ExternalLink size={14} /> + </a> + </div> + + <div className="bg-[#0c0c0e] border border-border rounded-lg p-4 font-mono text-xs text-zinc-300 flex items-center justify-between gap-3"> + <span className="truncate">git clone https://github.com/mutonby/skill-autoshorts</span> + <button + onClick={() => navigator.clipboard.writeText('git clone https://github.com/mutonby/skill-autoshorts')} + className="text-zinc-500 hover:text-white transition-colors shrink-0" + title="Copy" + > + <Copy size={14} /> + </button> + </div> + + <div className="grid sm:grid-cols-2 gap-3 text-sm"> + {[ + 'Daily batch — picks one long video per run', + 'Whisper transcription with word-level timing', + 'Gemini multimodal moment detection', + 'Auto-publish to TikTok, Reels & YouTube Shorts', + ].map((t) => ( + <div key={t} className="flex items-start gap-2 text-zinc-300"> + <Check size={16} className="text-success shrink-0 mt-0.5" /> + <span>{t}</span> + </div> + ))} + </div> + </div> + </div> + ); +} diff --git a/frontend/src/pages/Legacy/SaaSShorts.jsx b/frontend/src/pages/Legacy/SaaSShorts.jsx new file mode 100644 index 00000000..539fe539 --- /dev/null +++ b/frontend/src/pages/Legacy/SaaSShorts.jsx @@ -0,0 +1,15 @@ +import SaaShortsTab from '../../components/SaaShortsTab'; +import { useKeys } from '../../state/keysStore.js'; + +export default function LegacySaaSShorts() { + const keys = useKeys(); + return ( + <SaaShortsTab + geminiApiKey={keys.gemini} + elevenLabsKey={keys.elevenLabs} + falKey={keys.fal} + uploadPostKey={keys.uploadPost} + uploadUserId={keys.uploadUserId} + /> + ); +} diff --git a/frontend/src/pages/Legacy/Thumbnails.jsx b/frontend/src/pages/Legacy/Thumbnails.jsx new file mode 100644 index 00000000..0ade0c25 --- /dev/null +++ b/frontend/src/pages/Legacy/Thumbnails.jsx @@ -0,0 +1,13 @@ +import ThumbnailStudio from '../../components/ThumbnailStudio'; +import { useKeys } from '../../state/keysStore.js'; + +export default function LegacyThumbnails() { + const keys = useKeys(); + return ( + <ThumbnailStudio + geminiApiKey={keys.gemini} + uploadPostKey={keys.uploadPost} + uploadUserId={keys.uploadUserId} + /> + ); +} diff --git a/frontend/src/pages/Legacy/UGCGalleryPage.jsx b/frontend/src/pages/Legacy/UGCGalleryPage.jsx new file mode 100644 index 00000000..b9589d61 --- /dev/null +++ b/frontend/src/pages/Legacy/UGCGalleryPage.jsx @@ -0,0 +1,5 @@ +import UGCGallery from '../../components/UGCGallery'; + +export default function LegacyUGCGallery() { + return <UGCGallery />; +} diff --git a/frontend/src/pages/LongForm.jsx b/frontend/src/pages/LongForm.jsx new file mode 100644 index 00000000..5cabc024 --- /dev/null +++ b/frontend/src/pages/LongForm.jsx @@ -0,0 +1,15 @@ +import PageStub from './PageStub.jsx'; + +export default function LongForm() { + return ( + <PageStub + title="Long-form" + description="Process a single long-form video end-to-end: color grade, subtitles, chapter detection, and segment-to-short exports." + todo={[ + 'Phase 4: 4-step wizard (Upload → Settings → Processing → Editor)', + 'Phase 4: chapter timeline scrubber + inline chapter rename', + 'Phase 4: subtitle panel + Export segment as short', + ]} + /> + ); +} diff --git a/frontend/src/pages/PageStub.jsx b/frontend/src/pages/PageStub.jsx new file mode 100644 index 00000000..4fa42f34 --- /dev/null +++ b/frontend/src/pages/PageStub.jsx @@ -0,0 +1,21 @@ +// Placeholder body used by every page that hasn't been built yet. +// Phase 1 ships these so the sidebar renders and navigation works; +// later phases swap each stub for the real implementation. + +export default function PageStub({ title, description, todo }) { + return ( + <div className="p-8 max-w-3xl"> + <div className="text-[10px] uppercase tracking-[0.12em] text-zinc-500 mb-2">Phase 1 placeholder</div> + <h2 className="text-[20px] font-semibold text-white mb-3">{title}</h2> + {description && <p className="text-[13px] text-zinc-400 leading-relaxed mb-6">{description}</p>} + {todo && todo.length > 0 && ( + <div className="rounded-lg border border-border bg-surface p-5"> + <div className="text-[11px] uppercase tracking-wider text-zinc-500 mb-3">Up next</div> + <ul className="space-y-2 text-[12px] text-zinc-300"> + {todo.map((item, i) => <li key={i} className="flex gap-2"><span className="text-zinc-600">·</span><span>{item}</span></li>)} + </ul> + </div> + )} + </div> + ); +} diff --git a/frontend/src/pages/Settings.jsx b/frontend/src/pages/Settings.jsx new file mode 100644 index 00000000..c1f932e8 --- /dev/null +++ b/frontend/src/pages/Settings.jsx @@ -0,0 +1,162 @@ +// Settings — Phase 1 wraps the existing settings panels (Gemini key, +// Brand Kit, Upload-Post, ElevenLabs, fal.ai) under a single scrollable +// page so configuration keeps working through the restructure. +// Phase 2 rebuilds this with a VS Code-style left nav + per-section +// content panel. + +import { useEffect, useState } from 'react'; +import { Shield } from 'lucide-react'; +import KeyInput from '../components/KeyInput'; +import BrandKit from '../components/BrandKit'; +import { fetchUploadProfiles, setKey, useKeys } from '../state/keysStore.js'; + +export default function Settings() { + const keys = useKeys(); + const [profiles, setProfiles] = useState([]); + const [connectStatus, setConnectStatus] = useState('idle'); // idle | loading | error + + useEffect(() => { + if (keys.uploadPost && profiles.length === 0) { + handleFetchProfiles(); + } + }, [keys.uploadPost]); + + async function handleFetchProfiles() { + if (!keys.uploadPost) return; + setConnectStatus('loading'); + try { + const data = await fetchUploadProfiles(keys.uploadPost); + if (data.profiles?.length) { + setProfiles(data.profiles); + if (!keys.uploadUserId) setKey('uploadUser', data.profiles[0].username); + setConnectStatus('idle'); + } else { + setConnectStatus('error'); + } + } catch { + setConnectStatus('error'); + } + } + + return ( + <div className="p-8 max-w-5xl mx-auto space-y-8"> + <div className="flex items-center justify-between"> + <h1 className="text-[20px] font-semibold text-white">Settings</h1> + <div className="px-3 py-1 bg-success/10 border border-success/30 rounded-full text-[10px] text-success font-medium flex items-center gap-2"> + <Shield size={12} /> Keys live only in your browser + </div> + </div> + + <div className="rounded-xl border border-border bg-surface p-1"> + <KeyInput onKeySet={(v) => setKey('gemini', v)} savedKey={keys.gemini} /> + </div> + + <BrandKit /> + + <SettingsPanel + title="Social Integration" + badge="Required" + badgeTone="amber" + description="Required to publish your clips to TikTok, Instagram Reels, and YouTube Shorts via Upload-Post. Includes a free tier." + > + <div className="space-y-3"> + <label className="block text-[13px] text-zinc-400">Upload-Post API Key</label> + <div className="flex gap-2"> + <input + type="password" + value={keys.uploadPost} + onChange={(e) => setKey('uploadPost', e.target.value)} + className="input-field" + placeholder="ey..." + /> + <button onClick={handleFetchProfiles} className="btn-primary py-2 px-4 text-sm"> + Connect + </button> + </div> + {connectStatus === 'error' && ( + <p className="text-[12px] text-red-400">No profiles found. Check your key.</p> + )} + {profiles.length > 0 && ( + <div className="text-[12px] text-zinc-400"> + Connected as <span className="text-white font-medium">{profiles.find(p => p.username === keys.uploadUserId)?.username || profiles[0].username}</span> + {profiles.length > 1 && ( + <select + value={keys.uploadUserId || profiles[0].username} + onChange={(e) => setKey('uploadUser', e.target.value)} + className="ml-3 bg-surface border border-border rounded-md px-2 py-1 text-[12px]" + > + {profiles.map((p) => <option key={p.username} value={p.username}>{p.username}</option>)} + </select> + )} + </div> + )} + </div> + </SettingsPanel> + + <SettingsPanel + title="Video Translation" + badge="Optional" + description="Translate your clips to different languages using ElevenLabs AI dubbing." + > + <div className="space-y-3"> + <label className="block text-[13px] text-zinc-400">ElevenLabs API Key</label> + <div className="flex gap-2"> + <input + type="password" + value={keys.elevenLabs} + onChange={(e) => setKey('elevenLabs', e.target.value)} + className="input-field" + placeholder="sk_..." + /> + </div> + </div> + </SettingsPanel> + + <SettingsPanel + title="AI Shorts (fal.ai)" + badge="Optional" + description="Used by the legacy SaaS UGC generator. Generates AI actors and b-roll." + > + <div className="space-y-3"> + <label className="block text-[13px] text-zinc-400">fal.ai API Key</label> + <div className="flex gap-2"> + <input + type="password" + value={keys.fal} + onChange={(e) => setKey('fal', e.target.value)} + className="input-field" + placeholder="fal_..." + /> + </div> + </div> + </SettingsPanel> + + <div className="rounded-lg border border-border bg-surface p-5"> + <div className="text-[11px] uppercase tracking-wider text-zinc-500 mb-2">Phase 2</div> + <p className="text-[13px] text-zinc-400"> + This page will be rebuilt with a VS Code-style left nav (General / Platforms / System) and per-section content. The Brand Kit will move under <span className="text-zinc-200">General</span>. + </p> + </div> + </div> + ); +} + +function SettingsPanel({ title, badge, badgeTone, description, children }) { + const toneClass = badgeTone === 'amber' + ? 'bg-amber-500/10 border-amber-500/30 text-amber-400' + : 'bg-white/5 border-border text-zinc-500'; + return ( + <div className="rounded-xl border border-border bg-surface p-6"> + <div className="flex items-center justify-between mb-3"> + <h2 className="text-[15px] font-semibold text-white">{title}</h2> + {badge && ( + <span className={`text-[10px] px-2 py-0.5 rounded uppercase tracking-wider border ${toneClass}`}> + {badge} + </span> + )} + </div> + {description && <p className="text-[12px] text-zinc-500 mb-5 leading-relaxed">{description}</p>} + {children} + </div> + ); +} diff --git a/frontend/src/pages/ShortForm.jsx b/frontend/src/pages/ShortForm.jsx new file mode 100644 index 00000000..40a28b84 --- /dev/null +++ b/frontend/src/pages/ShortForm.jsx @@ -0,0 +1,16 @@ +import PageStub from './PageStub.jsx'; + +export default function ShortForm() { + return ( + <PageStub + title="Short-form" + description="Upload up to 5 videos. The wizard categorizes each clip, applies the right layout, and exports to TikTok, Reels, and Shorts." + todo={[ + 'Phase 3: 4-step wizard (Upload → Categorize → Processing → Review)', + 'Phase 3: per-clip progress + Snake mini-game during processing', + 'Phase 3: phone-shaped preview + Before/After toggle + export bar', + 'History tab listing past batches', + ]} + /> + ); +} diff --git a/frontend/src/state/jobStore.js b/frontend/src/state/jobStore.js new file mode 100644 index 00000000..733413fe --- /dev/null +++ b/frontend/src/state/jobStore.js @@ -0,0 +1,110 @@ +// Job store. Holds the active Clip Generator job state — jobId, status, +// results, logs, the source media being processed, and the synced-playback +// timing the Result cards use to scrub the source video. +// +// Same custom-event pattern as keysStore + brandKit. Single global slot +// because at any moment there's one active job in the existing pipeline. + +import { useEffect, useState } from 'react'; + +const SESSION_KEY = 'openshorts_session'; +const SESSION_MAX_AGE = 3_600_000; // 1 hour — matches backend job retention +const EVENT = 'openshorts:job-changed'; + +const INITIAL_STATE = { + jobId: null, + status: 'idle', // idle | processing | complete | error + results: null, + logs: [], + processingMedia: null, // { type: 'url'|'file', payload, acknowledged } + syncedTime: 0, + isSyncedPlaying: false, + syncTrigger: 0, + sessionRecovered: false, +}; + +let _state = { ...INITIAL_STATE }; + +function emit() { + window.dispatchEvent(new CustomEvent(EVENT, { detail: _state })); +} + +export function getJob() { + return _state; +} + +export function updateJob(patch) { + _state = { ..._state, ...patch }; + emit(); + // Persist non-trivial state to localStorage so reloads can recover. + if (_state.status === 'idle') { + localStorage.removeItem(SESSION_KEY); + return; + } + try { + localStorage.setItem(SESSION_KEY, JSON.stringify({ + jobId: _state.jobId, + status: _state.status, + results: _state.results, + processingMedia: _state.processingMedia?.type === 'url' ? _state.processingMedia : null, + timestamp: Date.now(), + })); + } catch { + // localStorage full — ignore + } +} + +export function resetJob() { + _state = { ...INITIAL_STATE }; + localStorage.removeItem(SESSION_KEY); + emit(); +} + +export function recoverJob() { + try { + const saved = localStorage.getItem(SESSION_KEY); + if (!saved) return false; + const session = JSON.parse(saved); + if (Date.now() - session.timestamp > SESSION_MAX_AGE) { + localStorage.removeItem(SESSION_KEY); + return false; + } + if (!session.jobId || !session.status || session.status === 'idle') return false; + _state = { + ...INITIAL_STATE, + jobId: session.jobId, + status: session.status === 'processing' ? 'processing' : session.status, + results: session.results || null, + processingMedia: session.processingMedia || null, + sessionRecovered: true, + }; + emit(); + setTimeout(() => updateJob({ sessionRecovered: false }), 5000); + return true; + } catch { + localStorage.removeItem(SESSION_KEY); + return false; + } +} + +export function triggerSyncedPlay(startTime) { + updateJob({ + syncedTime: startTime, + isSyncedPlaying: true, + syncTrigger: _state.syncTrigger + 1, + }); +} + +export function triggerSyncedPause() { + updateJob({ isSyncedPlaying: false }); +} + +export function useJob() { + const [state, setState] = useState(() => getJob()); + useEffect(() => { + const onChange = (e) => setState(e.detail || getJob()); + window.addEventListener(EVENT, onChange); + return () => window.removeEventListener(EVENT, onChange); + }, []); + return state; +} diff --git a/frontend/src/state/keysStore.js b/frontend/src/state/keysStore.js new file mode 100644 index 00000000..176e47a1 --- /dev/null +++ b/frontend/src/state/keysStore.js @@ -0,0 +1,78 @@ +// API key store. Holds Gemini, Upload-Post, ElevenLabs, fal.ai keys + the +// selected Upload-Post user profile. Mirrors the brandKit.js pattern: +// localStorage-backed, broadcasts via a custom event so any page subscribed +// to `useKeys()` re-renders on change. + +import { useEffect, useState } from 'react'; +import { encrypt, decrypt } from '../lib/crypto.js'; +import { getApiUrl } from '../config.js'; + +const STORAGE = { + gemini: { key: 'gemini_key', encrypted: false }, + uploadPost: { key: 'uploadPostKey_v3', encrypted: true }, + elevenLabs: { key: 'elevenLabsKey_v1', encrypted: true }, + fal: { key: 'falKey_v1', encrypted: true }, + uploadUser: { key: 'uploadUserId', encrypted: false }, +}; + +const EVENT = 'openshorts:keys-changed'; + +function readOne(spec) { + const raw = localStorage.getItem(spec.key); + if (!raw) return ''; + return spec.encrypted ? decrypt(raw) : raw; +} + +export function loadKeys() { + return { + gemini: readOne(STORAGE.gemini), + uploadPost: readOne(STORAGE.uploadPost), + elevenLabs: readOne(STORAGE.elevenLabs), + fal: readOne(STORAGE.fal), + uploadUserId: readOne(STORAGE.uploadUser), + }; +} + +export function setKey(name, value) { + const spec = STORAGE[name]; + if (!spec) throw new Error(`Unknown key: ${name}`); + if (!value) { + localStorage.removeItem(spec.key); + } else { + localStorage.setItem(spec.key, spec.encrypted ? encrypt(value) : value); + } + window.dispatchEvent(new CustomEvent(EVENT, { detail: loadKeys() })); +} + +export function setUploadUserId(value) { + setKey('uploadUser', value); +} + +export function useKeys() { + const [keys, setKeys] = useState(() => loadKeys()); + useEffect(() => { + const onChange = (e) => setKeys(e.detail || loadKeys()); + const onStorage = (e) => { + if (Object.values(STORAGE).some(s => s.key === e.key)) setKeys(loadKeys()); + }; + window.addEventListener(EVENT, onChange); + window.addEventListener('storage', onStorage); + return () => { + window.removeEventListener(EVENT, onChange); + window.removeEventListener('storage', onStorage); + }; + }, []); + return keys; +} + +// Fetch Upload-Post profiles for the current API key. Returns +// { profiles: [...] } or throws. Stored separately from keys because +// profile list is server-side state, not credential state. +export async function fetchUploadProfiles(uploadPostKey) { + if (!uploadPostKey) throw new Error('No Upload-Post key'); + const res = await fetch(getApiUrl('/api/social/user'), { + headers: { 'X-Upload-Post-Key': uploadPostKey }, + }); + if (!res.ok) throw new Error('Failed to fetch profiles'); + return res.json(); +} diff --git a/frontend/tailwind.config.js b/frontend/tailwind.config.js index 32e9bcdf..17130fad 100644 --- a/frontend/tailwind.config.js +++ b/frontend/tailwind.config.js @@ -7,10 +7,20 @@ export default { theme: { extend: { colors: { - background: "#09090b", - surface: "#18181b", - primary: "#3b82f6", - accent: "#8b5cf6", + background: "#0c0c0c", + sidebar: "#111111", + surface: "#141414", + border: "#1e1e1e", + primary: "#5b5ef4", + accent: "#5b5ef4", + success: "#34d470", + platform: { + youtube: "#f87171", + tiktok: "#a5a8fd", + instagram: "#f0abfc", + snapchat: "#facc15", + facebook: "#1877f2", + }, }, animation: { 'pulse-slow': 'pulse 3s cubic-bezier(0.4, 0, 0.6, 1) infinite', From 337b509ecdd096f17d8d5f169dbd4de33d16f49f Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 21:29:46 -0400 Subject: [PATCH 22/43] =?UTF-8?q?feat(ui):=20phase=202=20=E2=80=94=20Setti?= =?UTF-8?q?ngs=20VS-Code=20layout=20+=20notifications=20+=20tooltips?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rebuild Settings with a 180px left nav grouped into General / Platforms / System. Each item is its own route under /settings/*: - General: Brand Kit (live), subtitle style / color presets / export defaults (placeholders documenting future controls). - Platforms: shared PlatformSection driven by :platform route param, one panel per YouTube/TikTok/Instagram/Snapchat/Facebook. - System: API Keys (Gemini + Upload-Post + ElevenLabs + fal.ai with the same connect/profile flow as before), Processing history (placeholder). Notification system: - state/notificationsStore.js — localStorage-backed feed with pushNotification / markRead / clearNotifications + useNotifications(). - components/ui/NotificationBell.jsx — Header dropdown with unread badge, platform-colored dots, "Mark all read" + "Clear all". - ResultCard + ScheduleWeekModal now push a 'submitted' (or 'scheduled' / 'failed') notification per platform on /api/social/post. UI primitives: - components/ui/Tooltip.jsx — CSS-only group-hover label, no deps. - components/ui/InfoIcon.jsx — small lucide Info wrapped in Tooltip, used next to API key panels. Backend gap (plan TODO #9) still open: /api/social/post stays synchronous, so 'submitted' is the terminal client-side status until a publish_jobs queue + GET /api/social/publish/status/{id} exists. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- frontend/src/App.jsx | 22 ++- frontend/src/components/ResultCard.jsx | 26 +++ frontend/src/components/ScheduleWeekModal.jsx | 19 ++ frontend/src/components/ui/InfoIcon.jsx | 19 ++ .../src/components/ui/NotificationBell.jsx | 138 +++++++++++++++ frontend/src/components/ui/Tooltip.jsx | 26 +++ frontend/src/layouts/Header.jsx | 16 +- frontend/src/pages/Settings.jsx | 162 ------------------ frontend/src/pages/Settings/index.jsx | 78 +++++++++ .../Settings/sections/ApiKeysSection.jsx | 147 ++++++++++++++++ .../Settings/sections/BrandKitSection.jsx | 14 ++ .../Settings/sections/ColorPresetsSection.jsx | 15 ++ .../sections/ExportDefaultsSection.jsx | 16 ++ .../Settings/sections/HistorySection.jsx | 16 ++ .../Settings/sections/PlaceholderSection.jsx | 27 +++ .../Settings/sections/PlatformSection.jsx | 44 +++++ .../pages/Settings/sections/SectionHeader.jsx | 19 ++ .../sections/SubtitleStyleSection.jsx | 15 ++ frontend/src/state/notificationsStore.js | 109 ++++++++++++ 19 files changed, 750 insertions(+), 178 deletions(-) create mode 100644 frontend/src/components/ui/InfoIcon.jsx create mode 100644 frontend/src/components/ui/NotificationBell.jsx create mode 100644 frontend/src/components/ui/Tooltip.jsx delete mode 100644 frontend/src/pages/Settings.jsx create mode 100644 frontend/src/pages/Settings/index.jsx create mode 100644 frontend/src/pages/Settings/sections/ApiKeysSection.jsx create mode 100644 frontend/src/pages/Settings/sections/BrandKitSection.jsx create mode 100644 frontend/src/pages/Settings/sections/ColorPresetsSection.jsx create mode 100644 frontend/src/pages/Settings/sections/ExportDefaultsSection.jsx create mode 100644 frontend/src/pages/Settings/sections/HistorySection.jsx create mode 100644 frontend/src/pages/Settings/sections/PlaceholderSection.jsx create mode 100644 frontend/src/pages/Settings/sections/PlatformSection.jsx create mode 100644 frontend/src/pages/Settings/sections/SectionHeader.jsx create mode 100644 frontend/src/pages/Settings/sections/SubtitleStyleSection.jsx create mode 100644 frontend/src/state/notificationsStore.js diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 89cdfab4..29fd1f19 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -4,7 +4,14 @@ import Dashboard from './pages/Dashboard.jsx'; import ShortForm from './pages/ShortForm.jsx'; import LongForm from './pages/LongForm.jsx'; import ClipGenerator from './pages/ClipGenerator.jsx'; -import Settings from './pages/Settings.jsx'; +import SettingsLayout from './pages/Settings/index.jsx'; +import BrandKitSection from './pages/Settings/sections/BrandKitSection.jsx'; +import SubtitleStyleSection from './pages/Settings/sections/SubtitleStyleSection.jsx'; +import ColorPresetsSection from './pages/Settings/sections/ColorPresetsSection.jsx'; +import ExportDefaultsSection from './pages/Settings/sections/ExportDefaultsSection.jsx'; +import PlatformSection from './pages/Settings/sections/PlatformSection.jsx'; +import ApiKeysSection from './pages/Settings/sections/ApiKeysSection.jsx'; +import HistorySection from './pages/Settings/sections/HistorySection.jsx'; import LegacySaaSShorts from './pages/Legacy/SaaSShorts.jsx'; import LegacyThumbnails from './pages/Legacy/Thumbnails.jsx'; import LegacyUGCGallery from './pages/Legacy/UGCGalleryPage.jsx'; @@ -19,7 +26,18 @@ export default function App() { <Route path="short-form/*" element={<ShortForm />} /> <Route path="long-form/*" element={<LongForm />} /> <Route path="clip-generator" element={<ClipGenerator />} /> - <Route path="settings/*" element={<Settings />} /> + + <Route path="settings" element={<SettingsLayout />}> + <Route index element={<Navigate to="general/brand-kit" replace />} /> + <Route path="general/brand-kit" element={<BrandKitSection />} /> + <Route path="general/subtitle-style" element={<SubtitleStyleSection />} /> + <Route path="general/color-presets" element={<ColorPresetsSection />} /> + <Route path="general/export-defaults" element={<ExportDefaultsSection />} /> + <Route path="platforms/:platform" element={<PlatformSection />} /> + <Route path="system/api-keys" element={<ApiKeysSection />} /> + <Route path="system/history" element={<HistorySection />} /> + </Route> + <Route path="legacy/saasshorts" element={<LegacySaaSShorts />} /> <Route path="legacy/thumbnails" element={<LegacyThumbnails />} /> <Route path="legacy/ugc" element={<LegacyUGCGallery />} /> diff --git a/frontend/src/components/ResultCard.jsx b/frontend/src/components/ResultCard.jsx index 24bc0427..573981f8 100644 --- a/frontend/src/components/ResultCard.jsx +++ b/frontend/src/components/ResultCard.jsx @@ -5,6 +5,7 @@ import SubtitleModal from './SubtitleModal'; import HookModal from './HookModal'; import TranslateModal from './TranslateModal'; import { renderInBrowser } from '../lib/renderInBrowser'; +import { pushNotification } from '../state/notificationsStore'; export default function ResultCard({ clip, index, jobId, uploadPostKey, uploadUserId, geminiApiKey, elevenLabsKey, onPlay, onPause }) { const [showModal, setShowModal] = useState(false); @@ -372,6 +373,20 @@ export default function ResultCard({ clip, index, jobId, uploadPostKey, uploadUs } setPostResult({ success: true, msg: isScheduling ? "Scheduled successfully!" : "Posted successfully!" }); + // Push one notification per selected platform so the bell groups by + // platform. Status remains 'submitted' / 'scheduled' until a future + // backend push channel can confirm actual delivery (plan TODO #9). + selectedPlatforms.forEach((platform) => { + pushNotification({ + type: 'publish', + platform, + status: isScheduling ? 'scheduled' : 'submitted', + jobId, + message: isScheduling + ? `Clip ${index + 1} scheduled on ${platform}` + : `Clip ${index + 1} sent to ${platform}`, + }); + }); setTimeout(() => { setShowModal(false); setPostResult(null); @@ -379,6 +394,17 @@ export default function ResultCard({ clip, index, jobId, uploadPostKey, uploadUs } catch (e) { setPostResult({ success: false, msg: `Failed: ${e.message}` }); + // Surface the failure in the bell so users notice even if the modal + // has been dismissed. + selectedPlatforms.forEach((platform) => { + pushNotification({ + type: 'publish', + platform, + status: 'failed', + jobId, + message: `Clip ${index + 1} failed on ${platform}: ${e.message}`, + }); + }); } finally { setPosting(false); } diff --git a/frontend/src/components/ScheduleWeekModal.jsx b/frontend/src/components/ScheduleWeekModal.jsx index adc54ced..8d3ce129 100644 --- a/frontend/src/components/ScheduleWeekModal.jsx +++ b/frontend/src/components/ScheduleWeekModal.jsx @@ -1,6 +1,7 @@ import React, { useState, useMemo } from 'react'; import { X, Loader2, Calendar, Clock, CheckCircle, AlertCircle, Video, Instagram, Youtube, ChevronLeft, ChevronRight, Globe, ExternalLink } from 'lucide-react'; import { getApiUrl } from '../config'; +import { pushNotification } from '../state/notificationsStore'; const DAYS = ['Dom', 'Lun', 'Mar', 'Mié', 'Jue', 'Vie', 'Sáb']; const MONTHS = ['Ene', 'Feb', 'Mar', 'Abr', 'May', 'Jun', 'Jul', 'Ago', 'Sep', 'Oct', 'Nov', 'Dic']; @@ -146,8 +147,26 @@ export default function ScheduleWeekModal({ isOpen, onClose, clips, jobId, uploa } results.push({ index: i, success: true }); + selectedPlatforms.forEach((platform) => { + pushNotification({ + type: 'publish', + platform, + status: 'scheduled', + jobId, + message: `Clip ${index + 1} scheduled on ${platform}`, + }); + }); } catch (e) { results.push({ index: i, success: false, error: e.message }); + selectedPlatforms.forEach((platform) => { + pushNotification({ + type: 'publish', + platform, + status: 'failed', + jobId, + message: `Clip ${index + 1} failed on ${platform}: ${e.message}`, + }); + }); } setProgress({ current: i + 1, total, results: [...results] }); diff --git a/frontend/src/components/ui/InfoIcon.jsx b/frontend/src/components/ui/InfoIcon.jsx new file mode 100644 index 00000000..93533d02 --- /dev/null +++ b/frontend/src/components/ui/InfoIcon.jsx @@ -0,0 +1,19 @@ +// Tiny "?" / info icon next to a primary action. Hover reveals the +// explanation via Tooltip. Used pervasively per the spec. + +import { Info } from 'lucide-react'; +import Tooltip from './Tooltip.jsx'; + +export default function InfoIcon({ label, side = 'bottom', size = 12, className = '' }) { + return ( + <Tooltip label={label} side={side} className={className}> + <span + tabIndex={0} + className="inline-flex items-center justify-center w-4 h-4 rounded-full text-zinc-500 hover:text-zinc-300 focus:text-zinc-300 transition-colors outline-none" + aria-label={label} + > + <Info size={size} /> + </span> + </Tooltip> + ); +} diff --git a/frontend/src/components/ui/NotificationBell.jsx b/frontend/src/components/ui/NotificationBell.jsx new file mode 100644 index 00000000..8c3bfc5b --- /dev/null +++ b/frontend/src/components/ui/NotificationBell.jsx @@ -0,0 +1,138 @@ +// Header notification bell. Shows unread badge + dropdown panel listing +// the latest publish/render events. Reads from notificationsStore. + +import { useEffect, useRef, useState } from 'react'; +import { Bell, Check, X } from 'lucide-react'; +import { useNotifications } from '../../state/notificationsStore.js'; + +const PLATFORM_DOT = { + youtube: 'bg-platform-youtube', + tiktok: 'bg-platform-tiktok', + instagram: 'bg-platform-instagram', + snapchat: 'bg-platform-snapchat', + facebook: 'bg-platform-facebook', +}; + +const STATUS_LABEL = { + submitted: 'Submitted', + scheduled: 'Scheduled', + published: 'Published', + failed: 'Failed', +}; + +function formatTime(ts) { + const date = new Date(ts); + const now = new Date(); + const sameDay = date.toDateString() === now.toDateString(); + return sameDay + ? date.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' }) + : date.toLocaleDateString([], { month: 'short', day: 'numeric' }); +} + +export default function NotificationBell() { + const { items, unread, markRead, markAllRead, clearNotifications } = useNotifications(); + const [open, setOpen] = useState(false); + const ref = useRef(null); + + useEffect(() => { + if (!open) return; + function handleClick(e) { + if (ref.current && !ref.current.contains(e.target)) setOpen(false); + } + window.addEventListener('mousedown', handleClick); + return () => window.removeEventListener('mousedown', handleClick); + }, [open]); + + return ( + <div className="relative" ref={ref}> + <button + type="button" + onClick={() => setOpen((v) => !v)} + className="relative w-8 h-8 flex items-center justify-center rounded-md text-zinc-400 hover:text-white hover:bg-white/5 transition-colors" + aria-label={unread > 0 ? `Notifications (${unread} unread)` : 'Notifications'} + > + <Bell size={16} /> + {unread > 0 && ( + <span className="absolute top-1.5 right-1.5 w-2 h-2 rounded-full bg-primary" /> + )} + </button> + + {open && ( + <div className="absolute right-0 mt-2 w-80 bg-surface border border-border rounded-lg shadow-2xl z-50 overflow-hidden"> + <div className="flex items-center justify-between px-4 py-2 border-b border-border bg-black/30"> + <span className="text-[12px] font-medium text-white">Notifications</span> + <div className="flex items-center gap-2"> + {unread > 0 && ( + <button + onClick={markAllRead} + className="text-[10px] uppercase tracking-wider text-zinc-400 hover:text-white" + > + Mark all read + </button> + )} + <button + onClick={() => setOpen(false)} + className="text-zinc-500 hover:text-white" + aria-label="Close" + > + <X size={14} /> + </button> + </div> + </div> + + <div className="max-h-96 overflow-y-auto custom-scrollbar"> + {items.length === 0 ? ( + <div className="px-4 py-8 text-center text-[12px] text-zinc-500"> + No notifications yet. + </div> + ) : ( + items.map((n) => ( + <button + key={n.id} + type="button" + onClick={() => markRead(n.id)} + className={`w-full px-4 py-3 border-b border-border last:border-0 flex items-start gap-3 text-left transition-colors ${ + n.read ? 'hover:bg-white/5' : 'bg-primary/[0.04] hover:bg-primary/[0.08]' + }`} + > + <span + className={`mt-1.5 w-1.5 h-1.5 rounded-full shrink-0 ${ + n.platform ? PLATFORM_DOT[n.platform] || 'bg-zinc-500' : 'bg-zinc-500' + }`} + /> + <div className="flex-1 min-w-0"> + <div className="flex items-center gap-2 text-[12px] text-white"> + <span className="font-medium truncate">{n.message || 'Event'}</span> + {n.status === 'failed' && ( + <span className="text-[10px] uppercase tracking-wider text-red-400">{n.status}</span> + )} + </div> + <div className="flex items-center gap-2 mt-0.5"> + {n.platform && ( + <span className="text-[10px] uppercase tracking-wider text-zinc-500">{n.platform}</span> + )} + <span className="text-[10px] text-zinc-600">{STATUS_LABEL[n.status] || n.status}</span> + <span className="text-[10px] text-zinc-700 ml-auto">{formatTime(n.ts)}</span> + </div> + </div> + {!n.read && <Check size={12} className="text-primary shrink-0 mt-1" />} + </button> + )) + )} + </div> + + {items.length > 0 && ( + <div className="px-4 py-2 border-t border-border bg-black/30 flex justify-end"> + <button + onClick={clearNotifications} + className="text-[10px] uppercase tracking-wider text-zinc-500 hover:text-zinc-300" + > + Clear all + </button> + </div> + )} + </div> + )} + </div> + ); +} diff --git a/frontend/src/components/ui/Tooltip.jsx b/frontend/src/components/ui/Tooltip.jsx new file mode 100644 index 00000000..5fa610d6 --- /dev/null +++ b/frontend/src/components/ui/Tooltip.jsx @@ -0,0 +1,26 @@ +// CSS-only tooltip. Wraps an arbitrary child element and shows a label +// on hover/focus. Uses Tailwind's group-hover pattern — no portal, +// no positioning library, no animation deps. + +export default function Tooltip({ label, side = 'bottom', children, className = '' }) { + if (!label) return children; + + const sideClass = { + top: 'bottom-full left-1/2 -translate-x-1/2 mb-1.5', + bottom: 'top-full left-1/2 -translate-x-1/2 mt-1.5', + left: 'right-full top-1/2 -translate-y-1/2 mr-1.5', + right: 'left-full top-1/2 -translate-y-1/2 ml-1.5', + }[side] || ''; + + return ( + <span className={`relative inline-flex group ${className}`}> + {children} + <span + role="tooltip" + className={`pointer-events-none absolute z-50 whitespace-nowrap rounded-md bg-[#0a0a0a] border border-border px-2 py-1 text-[11px] text-zinc-200 shadow-lg opacity-0 group-hover:opacity-100 group-focus-within:opacity-100 transition-opacity ${sideClass}`} + > + {label} + </span> + </span> + ); +} diff --git a/frontend/src/layouts/Header.jsx b/frontend/src/layouts/Header.jsx index 614058f2..f55ca8e0 100644 --- a/frontend/src/layouts/Header.jsx +++ b/frontend/src/layouts/Header.jsx @@ -1,5 +1,5 @@ import { useLocation, matchPath } from 'react-router-dom'; -import { Bell } from 'lucide-react'; +import NotificationBell from '../components/ui/NotificationBell.jsx'; const TITLE_RULES = [ { pattern: '/dashboard', title: 'Dashboard' }, @@ -29,23 +29,11 @@ export default function Header() { const location = useLocation(); const title = resolveTitle(location.pathname); - // Phase 2 wires real notifications; for now show the bell with no badge. - const unread = 0; - return ( <header className="h-[50px] shrink-0 bg-background border-b border-border flex items-center justify-between px-6"> <h1 className="text-[14px] font-medium text-white tracking-tight">{title}</h1> <div className="flex items-center gap-2"> - <button - type="button" - className="relative w-8 h-8 flex items-center justify-center rounded-md text-zinc-400 hover:text-white hover:bg-white/5 transition-colors" - aria-label="Notifications" - > - <Bell size={16} /> - {unread > 0 && ( - <span className="absolute top-1.5 right-1.5 w-2 h-2 rounded-full bg-primary" /> - )} - </button> + <NotificationBell /> </div> </header> ); diff --git a/frontend/src/pages/Settings.jsx b/frontend/src/pages/Settings.jsx deleted file mode 100644 index c1f932e8..00000000 --- a/frontend/src/pages/Settings.jsx +++ /dev/null @@ -1,162 +0,0 @@ -// Settings — Phase 1 wraps the existing settings panels (Gemini key, -// Brand Kit, Upload-Post, ElevenLabs, fal.ai) under a single scrollable -// page so configuration keeps working through the restructure. -// Phase 2 rebuilds this with a VS Code-style left nav + per-section -// content panel. - -import { useEffect, useState } from 'react'; -import { Shield } from 'lucide-react'; -import KeyInput from '../components/KeyInput'; -import BrandKit from '../components/BrandKit'; -import { fetchUploadProfiles, setKey, useKeys } from '../state/keysStore.js'; - -export default function Settings() { - const keys = useKeys(); - const [profiles, setProfiles] = useState([]); - const [connectStatus, setConnectStatus] = useState('idle'); // idle | loading | error - - useEffect(() => { - if (keys.uploadPost && profiles.length === 0) { - handleFetchProfiles(); - } - }, [keys.uploadPost]); - - async function handleFetchProfiles() { - if (!keys.uploadPost) return; - setConnectStatus('loading'); - try { - const data = await fetchUploadProfiles(keys.uploadPost); - if (data.profiles?.length) { - setProfiles(data.profiles); - if (!keys.uploadUserId) setKey('uploadUser', data.profiles[0].username); - setConnectStatus('idle'); - } else { - setConnectStatus('error'); - } - } catch { - setConnectStatus('error'); - } - } - - return ( - <div className="p-8 max-w-5xl mx-auto space-y-8"> - <div className="flex items-center justify-between"> - <h1 className="text-[20px] font-semibold text-white">Settings</h1> - <div className="px-3 py-1 bg-success/10 border border-success/30 rounded-full text-[10px] text-success font-medium flex items-center gap-2"> - <Shield size={12} /> Keys live only in your browser - </div> - </div> - - <div className="rounded-xl border border-border bg-surface p-1"> - <KeyInput onKeySet={(v) => setKey('gemini', v)} savedKey={keys.gemini} /> - </div> - - <BrandKit /> - - <SettingsPanel - title="Social Integration" - badge="Required" - badgeTone="amber" - description="Required to publish your clips to TikTok, Instagram Reels, and YouTube Shorts via Upload-Post. Includes a free tier." - > - <div className="space-y-3"> - <label className="block text-[13px] text-zinc-400">Upload-Post API Key</label> - <div className="flex gap-2"> - <input - type="password" - value={keys.uploadPost} - onChange={(e) => setKey('uploadPost', e.target.value)} - className="input-field" - placeholder="ey..." - /> - <button onClick={handleFetchProfiles} className="btn-primary py-2 px-4 text-sm"> - Connect - </button> - </div> - {connectStatus === 'error' && ( - <p className="text-[12px] text-red-400">No profiles found. Check your key.</p> - )} - {profiles.length > 0 && ( - <div className="text-[12px] text-zinc-400"> - Connected as <span className="text-white font-medium">{profiles.find(p => p.username === keys.uploadUserId)?.username || profiles[0].username}</span> - {profiles.length > 1 && ( - <select - value={keys.uploadUserId || profiles[0].username} - onChange={(e) => setKey('uploadUser', e.target.value)} - className="ml-3 bg-surface border border-border rounded-md px-2 py-1 text-[12px]" - > - {profiles.map((p) => <option key={p.username} value={p.username}>{p.username}</option>)} - </select> - )} - </div> - )} - </div> - </SettingsPanel> - - <SettingsPanel - title="Video Translation" - badge="Optional" - description="Translate your clips to different languages using ElevenLabs AI dubbing." - > - <div className="space-y-3"> - <label className="block text-[13px] text-zinc-400">ElevenLabs API Key</label> - <div className="flex gap-2"> - <input - type="password" - value={keys.elevenLabs} - onChange={(e) => setKey('elevenLabs', e.target.value)} - className="input-field" - placeholder="sk_..." - /> - </div> - </div> - </SettingsPanel> - - <SettingsPanel - title="AI Shorts (fal.ai)" - badge="Optional" - description="Used by the legacy SaaS UGC generator. Generates AI actors and b-roll." - > - <div className="space-y-3"> - <label className="block text-[13px] text-zinc-400">fal.ai API Key</label> - <div className="flex gap-2"> - <input - type="password" - value={keys.fal} - onChange={(e) => setKey('fal', e.target.value)} - className="input-field" - placeholder="fal_..." - /> - </div> - </div> - </SettingsPanel> - - <div className="rounded-lg border border-border bg-surface p-5"> - <div className="text-[11px] uppercase tracking-wider text-zinc-500 mb-2">Phase 2</div> - <p className="text-[13px] text-zinc-400"> - This page will be rebuilt with a VS Code-style left nav (General / Platforms / System) and per-section content. The Brand Kit will move under <span className="text-zinc-200">General</span>. - </p> - </div> - </div> - ); -} - -function SettingsPanel({ title, badge, badgeTone, description, children }) { - const toneClass = badgeTone === 'amber' - ? 'bg-amber-500/10 border-amber-500/30 text-amber-400' - : 'bg-white/5 border-border text-zinc-500'; - return ( - <div className="rounded-xl border border-border bg-surface p-6"> - <div className="flex items-center justify-between mb-3"> - <h2 className="text-[15px] font-semibold text-white">{title}</h2> - {badge && ( - <span className={`text-[10px] px-2 py-0.5 rounded uppercase tracking-wider border ${toneClass}`}> - {badge} - </span> - )} - </div> - {description && <p className="text-[12px] text-zinc-500 mb-5 leading-relaxed">{description}</p>} - {children} - </div> - ); -} diff --git a/frontend/src/pages/Settings/index.jsx b/frontend/src/pages/Settings/index.jsx new file mode 100644 index 00000000..8e66f64d --- /dev/null +++ b/frontend/src/pages/Settings/index.jsx @@ -0,0 +1,78 @@ +// Settings layout — VS Code style. 150px left nav grouped into +// General / Platforms / System; clicking a nav item loads the +// corresponding panel into <Outlet />. + +import { NavLink, Outlet } from 'react-router-dom'; +import { Shield } from 'lucide-react'; + +const SECTIONS = [ + { + label: 'General', + items: [ + { to: 'general/brand-kit', label: 'Brand Kit' }, + { to: 'general/subtitle-style', label: 'Subtitle style' }, + { to: 'general/color-presets', label: 'Color presets' }, + { to: 'general/export-defaults', label: 'Export defaults' }, + ], + }, + { + label: 'Platforms', + items: [ + { to: 'platforms/youtube', label: 'YouTube' }, + { to: 'platforms/tiktok', label: 'TikTok' }, + { to: 'platforms/instagram', label: 'Instagram' }, + { to: 'platforms/snapchat', label: 'Snapchat' }, + { to: 'platforms/facebook', label: 'Facebook' }, + ], + }, + { + label: 'System', + items: [ + { to: 'system/api-keys', label: 'API Keys' }, + { to: 'system/history', label: 'Processing history' }, + ], + }, +]; + +export default function SettingsLayout() { + return ( + <div className="flex h-full"> + <aside className="w-[180px] shrink-0 border-r border-border bg-surface/40 overflow-y-auto custom-scrollbar"> + <div className="px-4 py-4 border-b border-border"> + <div className="flex items-center gap-2 text-[10px] uppercase tracking-wider text-success"> + <Shield size={11} /> Keys live in browser + </div> + </div> + <nav className="py-2"> + {SECTIONS.map((section) => ( + <div key={section.label} className="py-2"> + <div className="px-4 py-1 text-[10px] uppercase tracking-[0.12em] text-zinc-500"> + {section.label} + </div> + {section.items.map((item) => ( + <NavLink + key={item.to} + to={item.to} + className={({ isActive }) => + `block px-4 py-1.5 text-[12px] transition-colors ${ + isActive + ? 'bg-primary/10 text-primary border-l-2 border-primary' + : 'text-zinc-400 hover:text-white hover:bg-white/[0.03] border-l-2 border-transparent' + }` + } + > + {item.label} + </NavLink> + ))} + </div> + ))} + </nav> + </aside> + <div className="flex-1 overflow-y-auto custom-scrollbar"> + <div className="max-w-3xl p-8"> + <Outlet /> + </div> + </div> + </div> + ); +} diff --git a/frontend/src/pages/Settings/sections/ApiKeysSection.jsx b/frontend/src/pages/Settings/sections/ApiKeysSection.jsx new file mode 100644 index 00000000..fb2c43df --- /dev/null +++ b/frontend/src/pages/Settings/sections/ApiKeysSection.jsx @@ -0,0 +1,147 @@ +import { useEffect, useState } from 'react'; +import KeyInput from '../../../components/KeyInput'; +import SectionHeader from './SectionHeader.jsx'; +import InfoIcon from '../../../components/ui/InfoIcon.jsx'; +import { fetchUploadProfiles, setKey, useKeys } from '../../../state/keysStore.js'; + +export default function ApiKeysSection() { + const keys = useKeys(); + const [profiles, setProfiles] = useState([]); + const [connectStatus, setConnectStatus] = useState('idle'); // idle | loading | error + + useEffect(() => { + if (keys.uploadPost && profiles.length === 0) { + handleFetchProfiles(); + } + }, [keys.uploadPost]); + + async function handleFetchProfiles() { + if (!keys.uploadPost) return; + setConnectStatus('loading'); + try { + const data = await fetchUploadProfiles(keys.uploadPost); + if (data.profiles?.length) { + setProfiles(data.profiles); + if (!keys.uploadUserId) setKey('uploadUser', data.profiles[0].username); + setConnectStatus('idle'); + } else { + setConnectStatus('error'); + } + } catch { + setConnectStatus('error'); + } + } + + return ( + <div className="space-y-6"> + <SectionHeader + title="API Keys" + description="All keys are encrypted in localStorage and sent per-request only to the OpenShorts backend. They are never stored server-side." + /> + + <Panel + title="Gemini" + badge="Required" + badgeTone="amber" + info="Used for viral-moment detection, effect prompts, title generation, descriptions, and thumbnails." + description="Google's Gemini 2.5 Flash — the only required key. Free tier covers personal use." + > + <KeyInput onKeySet={(v) => setKey('gemini', v)} savedKey={keys.gemini} /> + </Panel> + + <Panel + title="Upload-Post" + badge="Required for publishing" + badgeTone="amber" + info="Required to publish clips to TikTok, Instagram Reels, YouTube Shorts, Snapchat, and Facebook. Includes a free tier." + > + <div className="space-y-3"> + <label className="block text-[12px] text-zinc-400">API Key</label> + <div className="flex gap-2"> + <input + type="password" + value={keys.uploadPost} + onChange={(e) => setKey('uploadPost', e.target.value)} + className="input-field" + placeholder="ey..." + /> + <button + onClick={handleFetchProfiles} + disabled={!keys.uploadPost || connectStatus === 'loading'} + className="btn-primary py-2 px-4 text-sm disabled:opacity-50" + > + {connectStatus === 'loading' ? 'Connecting...' : 'Connect'} + </button> + </div> + {connectStatus === 'error' && ( + <p className="text-[12px] text-red-400">No profiles found. Check the key and try again.</p> + )} + {profiles.length > 0 && ( + <div className="flex items-center gap-3 text-[12px] text-zinc-400 pt-1"> + <span>Profile:</span> + <select + value={keys.uploadUserId || profiles[0].username} + onChange={(e) => setKey('uploadUser', e.target.value)} + className="bg-surface border border-border rounded-md px-2 py-1 text-[12px] text-white" + > + {profiles.map((p) => <option key={p.username} value={p.username}>{p.username}</option>)} + </select> + </div> + )} + </div> + </Panel> + + <Panel + title="ElevenLabs" + badge="Optional" + info="Powers AI voice dubbing across 30+ languages on a per-clip basis." + description="Translate clips into other languages while preserving the speaker's voice." + > + <input + type="password" + value={keys.elevenLabs} + onChange={(e) => setKey('elevenLabs', e.target.value)} + className="input-field" + placeholder="sk_..." + /> + </Panel> + + <Panel + title="fal.ai" + badge="Optional" + info="Used by the legacy SaaS UGC pipeline (Flux Pro + Kling) to generate AI actors and B-roll." + > + <input + type="password" + value={keys.fal} + onChange={(e) => setKey('fal', e.target.value)} + className="input-field" + placeholder="fal_..." + /> + </Panel> + </div> + ); +} + +function Panel({ title, badge, badgeTone, info, description, children }) { + const toneClass = badgeTone === 'amber' + ? 'bg-amber-500/10 border-amber-500/30 text-amber-400' + : 'bg-white/5 border-border text-zinc-500'; + return ( + <div className="rounded-xl border border-border bg-surface p-6"> + <div className="flex items-center justify-between mb-2"> + <div className="flex items-center gap-2"> + <h2 className="text-[14px] font-semibold text-white">{title}</h2> + {info && <InfoIcon label={info} side="right" />} + </div> + {badge && ( + <span className={`text-[10px] px-2 py-0.5 rounded uppercase tracking-wider border ${toneClass}`}> + {badge} + </span> + )} + </div> + {description && <p className="text-[12px] text-zinc-500 mb-4 leading-relaxed">{description}</p>} + {children} + </div> + ); +} diff --git a/frontend/src/pages/Settings/sections/BrandKitSection.jsx b/frontend/src/pages/Settings/sections/BrandKitSection.jsx new file mode 100644 index 00000000..d3d52b34 --- /dev/null +++ b/frontend/src/pages/Settings/sections/BrandKitSection.jsx @@ -0,0 +1,14 @@ +import BrandKit from '../../../components/BrandKit'; +import SectionHeader from './SectionHeader.jsx'; + +export default function BrandKitSection() { + return ( + <div> + <SectionHeader + title="Brand Kit" + description="Colors, font, and per-aspect-ratio text positioning that every subtitle, hook, and overlay inherits." + /> + <BrandKit /> + </div> + ); +} diff --git a/frontend/src/pages/Settings/sections/ColorPresetsSection.jsx b/frontend/src/pages/Settings/sections/ColorPresetsSection.jsx new file mode 100644 index 00000000..dd1e1f34 --- /dev/null +++ b/frontend/src/pages/Settings/sections/ColorPresetsSection.jsx @@ -0,0 +1,15 @@ +import PlaceholderSection from './PlaceholderSection.jsx'; + +export default function ColorPresetsSection() { + return ( + <PlaceholderSection + title="Color presets" + description="Cinematic LUTs and color-grade defaults applied during the short-form and long-form auto-edit. Needs the backend LUT integration before it can ship (plan TODO #5)." + todo={[ + 'Upload .cube / .3dl LUT files', + 'Choose a default LUT per workflow (short / long)', + 'Per-platform overrides', + ]} + /> + ); +} diff --git a/frontend/src/pages/Settings/sections/ExportDefaultsSection.jsx b/frontend/src/pages/Settings/sections/ExportDefaultsSection.jsx new file mode 100644 index 00000000..5fabf024 --- /dev/null +++ b/frontend/src/pages/Settings/sections/ExportDefaultsSection.jsx @@ -0,0 +1,16 @@ +import PlaceholderSection from './PlaceholderSection.jsx'; + +export default function ExportDefaultsSection() { + return ( + <PlaceholderSection + title="Export defaults" + description="Output settings used when you download or publish a clip — container, codec, bitrate, max duration. Per-platform overrides live under Platforms." + todo={[ + 'Default container (MP4 / MOV)', + 'Codec + bitrate target', + 'Auto-resize policy (crop vs. letterbox)', + 'Filename template', + ]} + /> + ); +} diff --git a/frontend/src/pages/Settings/sections/HistorySection.jsx b/frontend/src/pages/Settings/sections/HistorySection.jsx new file mode 100644 index 00000000..bb428c8a --- /dev/null +++ b/frontend/src/pages/Settings/sections/HistorySection.jsx @@ -0,0 +1,16 @@ +import PlaceholderSection from './PlaceholderSection.jsx'; + +export default function HistorySection() { + return ( + <PlaceholderSection + title="Processing history" + description="Past clip-generator jobs, short-form batches, and long-form renders. Re-download outputs or re-run a job with new settings." + todo={[ + 'Searchable list of past jobs', + 'Per-job re-download links', + 'Re-edit a clip in a new wizard run', + 'Backend index endpoint (plan TODO #10)', + ]} + /> + ); +} diff --git a/frontend/src/pages/Settings/sections/PlaceholderSection.jsx b/frontend/src/pages/Settings/sections/PlaceholderSection.jsx new file mode 100644 index 00000000..119f64f8 --- /dev/null +++ b/frontend/src/pages/Settings/sections/PlaceholderSection.jsx @@ -0,0 +1,27 @@ +import SectionHeader from './SectionHeader.jsx'; + +// Reusable placeholder for settings sections that haven't been built yet +// (subtitle style, color presets, export defaults, processing history, +// per-platform overrides). Phase 1/2 ships these as named drop targets; +// later phases fill them in. + +export default function PlaceholderSection({ title, description, todo, badge = 'Coming soon' }) { + return ( + <div> + <SectionHeader title={title} description={description} badge={badge} /> + {todo && todo.length > 0 && ( + <div className="rounded-lg border border-border bg-surface p-5 space-y-3"> + <div className="text-[10px] uppercase tracking-wider text-zinc-500">Planned controls</div> + <ul className="space-y-2 text-[13px] text-zinc-300"> + {todo.map((t, i) => ( + <li key={i} className="flex gap-2"> + <span className="text-zinc-600">·</span> + <span>{t}</span> + </li> + ))} + </ul> + </div> + )} + </div> + ); +} diff --git a/frontend/src/pages/Settings/sections/PlatformSection.jsx b/frontend/src/pages/Settings/sections/PlatformSection.jsx new file mode 100644 index 00000000..cfb7b7e8 --- /dev/null +++ b/frontend/src/pages/Settings/sections/PlatformSection.jsx @@ -0,0 +1,44 @@ +// One panel per social platform. Phase 2 stubs the override forms; the +// real wiring lands once the bell + scheduling backend gaps close. + +import { useParams } from 'react-router-dom'; +import PlaceholderSection from './PlaceholderSection.jsx'; + +const PLATFORMS = { + youtube: { label: 'YouTube', dotClass: 'bg-platform-youtube' }, + tiktok: { label: 'TikTok', dotClass: 'bg-platform-tiktok' }, + instagram: { label: 'Instagram', dotClass: 'bg-platform-instagram' }, + snapchat: { label: 'Snapchat', dotClass: 'bg-platform-snapchat' }, + facebook: { label: 'Facebook', dotClass: 'bg-platform-facebook' }, +}; + +export default function PlatformSection() { + const { platform = 'youtube' } = useParams(); + const meta = PLATFORMS[platform] || PLATFORMS.youtube; + + return ( + <div> + <div className="flex items-center gap-3 mb-6"> + <span className={`w-2.5 h-2.5 rounded-full ${meta.dotClass}`} /> + <h1 className="text-[18px] font-semibold text-white">{meta.label}</h1> + <span className="text-[10px] px-2 py-0.5 rounded uppercase tracking-wider border bg-white/5 border-border text-zinc-400"> + Per-platform overrides + </span> + </div> + <p className="text-[13px] text-zinc-500 leading-relaxed max-w-2xl mb-6"> + Override global subtitle style, color grade, export format, and scheduling defaults for {meta.label}. These take precedence over the General settings whenever a clip is published to this platform. + </p> + <PlaceholderSection + title="Overrides" + description="Each platform will expose: caption position, subtitle font, color grade, export codec, scheduling cadence." + badge="Coming soon" + todo={[ + 'Subtitle style override (per-platform safe-zone)', + 'Color grade override', + 'Export codec / container override', + 'Default scheduling cadence', + ]} + /> + </div> + ); +} diff --git a/frontend/src/pages/Settings/sections/SectionHeader.jsx b/frontend/src/pages/Settings/sections/SectionHeader.jsx new file mode 100644 index 00000000..ac8e5bc8 --- /dev/null +++ b/frontend/src/pages/Settings/sections/SectionHeader.jsx @@ -0,0 +1,19 @@ +// Shared header for every settings section: title + optional description. + +export default function SectionHeader({ title, description, badge }) { + return ( + <div className="mb-6"> + <div className="flex items-center gap-3 mb-1"> + <h1 className="text-[18px] font-semibold text-white">{title}</h1> + {badge && ( + <span className="text-[10px] px-2 py-0.5 rounded uppercase tracking-wider border bg-white/5 border-border text-zinc-400"> + {badge} + </span> + )} + </div> + {description && ( + <p className="text-[13px] text-zinc-500 leading-relaxed max-w-2xl">{description}</p> + )} + </div> + ); +} diff --git a/frontend/src/pages/Settings/sections/SubtitleStyleSection.jsx b/frontend/src/pages/Settings/sections/SubtitleStyleSection.jsx new file mode 100644 index 00000000..864523ae --- /dev/null +++ b/frontend/src/pages/Settings/sections/SubtitleStyleSection.jsx @@ -0,0 +1,15 @@ +import PlaceholderSection from './PlaceholderSection.jsx'; + +export default function SubtitleStyleSection() { + return ( + <PlaceholderSection + title="Subtitle style" + description="Defaults applied to auto-burn-in subtitles. Most controls live in the Brand Kit today; this panel will host extras (animation, dropshadow, casing rules, line-length thresholds)." + todo={[ + 'Word-by-word reveal speed', + 'Per-language font fallback chain', + 'Default burn-in vs. soft-sub on download', + ]} + /> + ); +} diff --git a/frontend/src/state/notificationsStore.js b/frontend/src/state/notificationsStore.js new file mode 100644 index 00000000..cf900ef0 --- /dev/null +++ b/frontend/src/state/notificationsStore.js @@ -0,0 +1,109 @@ +// Notification feed for the header bell. Frontend-only — backend has no +// push channel yet, so publish/render events are pushed here at the +// moment of each call. Persists to localStorage so the bell survives +// reloads. See plan TODO #9 for the missing backend status endpoint. + +import { useEffect, useState } from 'react'; + +const STORAGE_KEY = 'openshorts.notifications.v1'; +const MAX_ITEMS = 50; +const EVENT = 'openshorts:notifications-changed'; + +function load() { + try { + const raw = localStorage.getItem(STORAGE_KEY); + if (!raw) return []; + const parsed = JSON.parse(raw); + return Array.isArray(parsed) ? parsed : []; + } catch { + return []; + } +} + +let _items = load(); + +function persist() { + try { + localStorage.setItem(STORAGE_KEY, JSON.stringify(_items)); + } catch { + // localStorage full — drop oldest and retry once + _items = _items.slice(0, Math.max(0, _items.length - 5)); + try { localStorage.setItem(STORAGE_KEY, JSON.stringify(_items)); } catch {} + } +} + +function emit() { + persist(); + window.dispatchEvent(new CustomEvent(EVENT, { detail: _items })); +} + +export function listNotifications() { + return _items; +} + +export function unreadCount() { + return _items.filter((n) => !n.read).length; +} + +export function pushNotification(input) { + const item = { + id: input.id || `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`, + type: input.type || 'event', // publish | render | job | event + platform: input.platform || null, // youtube | tiktok | instagram | snapchat | facebook + status: input.status || 'submitted', // submitted | scheduled | published | failed + jobId: input.jobId || null, + publishId: input.publishId || null, + ts: input.ts || Date.now(), + message: input.message || '', + read: false, + }; + _items = [item, ..._items].slice(0, MAX_ITEMS); + emit(); + return item; +} + +export function markRead(id) { + let changed = false; + _items = _items.map((n) => { + if (n.id === id && !n.read) { changed = true; return { ...n, read: true }; } + return n; + }); + if (changed) emit(); +} + +export function markAllRead() { + if (!_items.some((n) => !n.read)) return; + _items = _items.map((n) => ({ ...n, read: true })); + emit(); +} + +export function clearNotifications() { + _items = []; + emit(); +} + +export function useNotifications() { + const [items, setItems] = useState(_items); + useEffect(() => { + const onChange = (e) => setItems(e.detail || listNotifications()); + const onStorage = (e) => { + if (e.key === STORAGE_KEY) { + _items = load(); + setItems(_items); + } + }; + window.addEventListener(EVENT, onChange); + window.addEventListener('storage', onStorage); + return () => { + window.removeEventListener(EVENT, onChange); + window.removeEventListener('storage', onStorage); + }; + }, []); + return { + items, + unread: items.filter((n) => !n.read).length, + markRead, + markAllRead, + clearNotifications, + }; +} From 97b7eff9413ba0e9a3d4b97507673bb4b515676f Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 21:44:37 -0400 Subject: [PATCH 23/43] =?UTF-8?q?feat(ui):=20phase=203=20=E2=80=94=20short?= =?UTF-8?q?-form=204-step=20wizard=20+=20UI=20primitives?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the ShortForm placeholder with a 4-step wizard (Upload → Categorize → Processing → Review) and the supporting UI primitives. Backend integration uses existing /api/process per file — batch endpoint is plan TODO #1. Wizard state: - New `useWizard` hook (useReducer + localStorage rehydrate) with optional `lock` flag on the Processing step. Persists step + serializable data; File handles don't survive JSON round-trips, so reloads recover the step index but require re-upload to retry a lost batch. - Step indicator with back-navigation disabled during locked steps. Steps: - Upload: drag-drop or click-to-browse, up to 5 files, MP4/MOV ≤ 2 GB, client-side type + size validation. - Categorize: 4 category cards per clip (Educational / Yap / Live / Viral, defaults pre-filled — AI categorization is plan TODO #2) plus an auto-edit settings panel (color grade, auto subs, silence removal, face-focus layout). - Processing: parallel POST /api/process per file, per-row status, Skip enables once any clip completes, Review unlocks when every file reaches complete/error. SnakeGame fills the wait. - Review: 230px clip list (left) + phone-framed video preview with Before/After (blob URL for the original) + export bar (Download, Publish, Schedule, Send to CapCut). Publish/Schedule pushes a notification via the bell store (real /api/social/post wiring blocked on plan TODO #9). UI primitives: - `PhoneFrame` — 9:16 bezel with notch (sm/md/lg). - `SnakeGame` — self-contained 20×20 grid, arrows/WASD, space pause, auto-pauses on document.hidden. - `PlatformBadge` — color-coded chip per platform, reuses the bg-platform-* tokens. - `StatCard` — Dashboard stat panel (Phase 4 will consume it). History: - /short-form/history reads `openshorts.shortForm.history` (written by Processing on completion). Backend index endpoint is plan TODO #10. App.jsx import updated to `./pages/ShortForm/index.jsx`; the old single-file `ShortForm.jsx` is removed. Build verified: 1610 modules, 1264 KB JS chunk. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- frontend/src/App.jsx | 2 +- frontend/src/components/ui/PhoneFrame.jsx | 19 ++ frontend/src/components/ui/PlatformBadge.jsx | 38 +++ frontend/src/components/ui/SnakeGame.jsx | 129 ++++++++++ frontend/src/components/ui/StatCard.jsx | 19 ++ frontend/src/hooks/useWizard.js | 109 +++++++++ frontend/src/pages/ShortForm.jsx | 16 -- frontend/src/pages/ShortForm/History.jsx | 59 +++++ frontend/src/pages/ShortForm/Wizard.jsx | 83 +++++++ frontend/src/pages/ShortForm/index.jsx | 52 ++++ .../src/pages/ShortForm/steps/Categorize.jsx | 140 +++++++++++ .../src/pages/ShortForm/steps/Processing.jsx | 224 ++++++++++++++++++ frontend/src/pages/ShortForm/steps/Review.jsx | 195 +++++++++++++++ frontend/src/pages/ShortForm/steps/Upload.jsx | 128 ++++++++++ 14 files changed, 1196 insertions(+), 17 deletions(-) create mode 100644 frontend/src/components/ui/PhoneFrame.jsx create mode 100644 frontend/src/components/ui/PlatformBadge.jsx create mode 100644 frontend/src/components/ui/SnakeGame.jsx create mode 100644 frontend/src/components/ui/StatCard.jsx create mode 100644 frontend/src/hooks/useWizard.js delete mode 100644 frontend/src/pages/ShortForm.jsx create mode 100644 frontend/src/pages/ShortForm/History.jsx create mode 100644 frontend/src/pages/ShortForm/Wizard.jsx create mode 100644 frontend/src/pages/ShortForm/index.jsx create mode 100644 frontend/src/pages/ShortForm/steps/Categorize.jsx create mode 100644 frontend/src/pages/ShortForm/steps/Processing.jsx create mode 100644 frontend/src/pages/ShortForm/steps/Review.jsx create mode 100644 frontend/src/pages/ShortForm/steps/Upload.jsx diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 29fd1f19..70c65972 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -1,7 +1,7 @@ import { Navigate, Route, Routes } from 'react-router-dom'; import AppShell from './layouts/AppShell.jsx'; import Dashboard from './pages/Dashboard.jsx'; -import ShortForm from './pages/ShortForm.jsx'; +import ShortForm from './pages/ShortForm/index.jsx'; import LongForm from './pages/LongForm.jsx'; import ClipGenerator from './pages/ClipGenerator.jsx'; import SettingsLayout from './pages/Settings/index.jsx'; diff --git a/frontend/src/components/ui/PhoneFrame.jsx b/frontend/src/components/ui/PhoneFrame.jsx new file mode 100644 index 00000000..7d52fcdd --- /dev/null +++ b/frontend/src/components/ui/PhoneFrame.jsx @@ -0,0 +1,19 @@ +// PhoneFrame — pure visual wrapper that gives any 9:16 content a phone-shaped +// bezel + notch. Wraps a <video>, <img>, or any preview component. + +export default function PhoneFrame({ children, className = '', size = 'md' }) { + const widths = { sm: 200, md: 260, lg: 320 }; + const w = widths[size] || widths.md; + const h = Math.round((w * 16) / 9); + return ( + <div + className={`relative bg-zinc-950 border border-zinc-800 rounded-[28px] p-2 shadow-2xl ${className}`} + style={{ width: w, height: h }} + > + <div className="absolute top-2.5 left-1/2 -translate-x-1/2 w-16 h-4 bg-black rounded-full z-10" /> + <div className="w-full h-full bg-black rounded-[22px] overflow-hidden flex items-center justify-center"> + {children} + </div> + </div> + ); +} diff --git a/frontend/src/components/ui/PlatformBadge.jsx b/frontend/src/components/ui/PlatformBadge.jsx new file mode 100644 index 00000000..8981a782 --- /dev/null +++ b/frontend/src/components/ui/PlatformBadge.jsx @@ -0,0 +1,38 @@ +// PlatformBadge — small color-coded chip for a social platform. The static +// class names are spelled out per Tailwind's safelist scan. + +import { Facebook, Instagram, Youtube } from 'lucide-react'; + +const TikTokGlyph = ({ size = 12 }) => ( + <svg width={size} height={size} viewBox="0 0 24 24" fill="currentColor"> + <path d="M19.589 6.686a4.793 4.793 0 0 1-3.77-4.245V2h-3.445v13.672a2.896 2.896 0 0 1-5.201 1.743 2.895 2.895 0 0 1 3.183-4.51v-3.5a6.329 6.329 0 0 0-5.394 10.692 6.33 6.33 0 0 0 10.857-4.424V8.687a8.182 8.182 0 0 0 4.773 1.526V6.79a4.831 4.831 0 0 1-1.003-.104z" /> + </svg> +); +const SnapGlyph = ({ size = 12 }) => ( + <svg width={size} height={size} viewBox="0 0 24 24" fill="currentColor"> + <path d="M12 2c5 0 7 4 7 7 0 1 0 3-.4 4.6.4.2 1 .3 1.6.3.5 0 1.4-.1 1.4.7 0 .8-1.4 1.1-2.3 1.4-.4.1-.6.2-.6.5 0 .8 2.8 2.6 4.7 2.6.5 0 .9.4.9.9 0 1.4-2.9 1.7-3.3 2.1-.1.1-.1.3 0 .5.1.4-.1.7-.5.7-.8 0-2-.5-3.4 0-1.3.5-2.2 2.2-5 2.2s-3.7-1.7-5-2.2c-1.4-.5-2.6 0-3.4 0-.4 0-.6-.3-.5-.7.1-.2.1-.4 0-.5-.4-.4-3.3-.7-3.3-2.1 0-.5.4-.9.9-.9 1.9 0 4.7-1.8 4.7-2.6 0-.3-.2-.4-.6-.5-.9-.3-2.3-.6-2.3-1.4 0-.8.9-.7 1.4-.7.6 0 1.2-.1 1.6-.3-.4-1.6-.4-3.6-.4-4.6 0-3 2-7 7-7z" /> + </svg> +); + +const PLATFORMS = { + youtube: { label: 'YouTube', icon: Youtube, class: 'text-platform-youtube border-platform-youtube/30 bg-platform-youtube/10' }, + tiktok: { label: 'TikTok', icon: TikTokGlyph, class: 'text-platform-tiktok border-platform-tiktok/30 bg-platform-tiktok/10' }, + instagram: { label: 'Instagram', icon: Instagram, class: 'text-platform-instagram border-platform-instagram/30 bg-platform-instagram/10' }, + snapchat: { label: 'Snapchat', icon: SnapGlyph, class: 'text-platform-snapchat border-platform-snapchat/30 bg-platform-snapchat/10' }, + facebook: { label: 'Facebook', icon: Facebook, class: 'text-platform-facebook border-platform-facebook/30 bg-platform-facebook/10' }, +}; + +export default function PlatformBadge({ platform, withLabel = true, size = 'sm' }) { + const meta = PLATFORMS[platform]; + if (!meta) return null; + const Icon = meta.icon; + const padding = size === 'sm' ? 'px-1.5 py-0.5 text-[10px]' : 'px-2 py-1 text-[11px]'; + return ( + <span className={`inline-flex items-center gap-1 rounded-md border ${padding} ${meta.class}`}> + <Icon size={size === 'sm' ? 12 : 14} /> + {withLabel && meta.label} + </span> + ); +} + +export { PLATFORMS }; diff --git a/frontend/src/components/ui/SnakeGame.jsx b/frontend/src/components/ui/SnakeGame.jsx new file mode 100644 index 00000000..945974cd --- /dev/null +++ b/frontend/src/components/ui/SnakeGame.jsx @@ -0,0 +1,129 @@ +// Self-contained Snake on a 20x20 grid to fill processing wait time. +// Pauses automatically when the tab is hidden. Arrow keys / WASD to steer, +// space to pause. + +import { useEffect, useRef, useState } from 'react'; + +const GRID = 20; +const TICK_MS = 110; + +function randCell(occupied) { + for (let i = 0; i < 200; i++) { + const c = { x: Math.floor(Math.random() * GRID), y: Math.floor(Math.random() * GRID) }; + if (!occupied.some((p) => p.x === c.x && p.y === c.y)) return c; + } + return { x: 0, y: 0 }; +} + +const INITIAL_SNAKE = [{ x: 10, y: 10 }, { x: 9, y: 10 }, { x: 8, y: 10 }]; +const INITIAL_DIR = { x: 1, y: 0 }; +const INITIAL_FOOD = { x: 14, y: 10 }; + +export default function SnakeGame({ onScore }) { + const [snake, setSnake] = useState(INITIAL_SNAKE); + const [dir, setDir] = useState(INITIAL_DIR); + const [food, setFood] = useState(INITIAL_FOOD); + const [score, setScore] = useState(0); + const [running, setRunning] = useState(true); + const [over, setOver] = useState(false); + + // Stash latest dir for the keydown closure. + const dirRef = useRef(dir); + dirRef.current = dir; + + useEffect(() => { + function onKey(e) { + const k = e.key; + const cur = dirRef.current; + if ((k === 'ArrowUp' || k === 'w' || k === 'W') && cur.y !== 1) { setDir({ x: 0, y: -1 }); e.preventDefault(); } + else if ((k === 'ArrowDown' || k === 's' || k === 'S') && cur.y !== -1) { setDir({ x: 0, y: 1 }); e.preventDefault(); } + else if ((k === 'ArrowLeft' || k === 'a' || k === 'A') && cur.x !== 1) { setDir({ x: -1, y: 0 }); e.preventDefault(); } + else if ((k === 'ArrowRight' || k === 'd' || k === 'D') && cur.x !== -1) { setDir({ x: 1, y: 0 }); e.preventDefault(); } + else if (k === ' ') { setRunning((r) => !r); e.preventDefault(); } + } + window.addEventListener('keydown', onKey); + return () => window.removeEventListener('keydown', onKey); + }, []); + + useEffect(() => { + function onVis() { if (document.hidden) setRunning(false); } + document.addEventListener('visibilitychange', onVis); + return () => document.removeEventListener('visibilitychange', onVis); + }, []); + + useEffect(() => { + if (!running || over) return; + const id = setInterval(() => { + setSnake((prev) => { + const head = prev[0]; + const next = { x: head.x + dir.x, y: head.y + dir.y }; + if (next.x < 0 || next.x >= GRID || next.y < 0 || next.y >= GRID) { + setOver(true); setRunning(false); return prev; + } + if (prev.some((p) => p.x === next.x && p.y === next.y)) { + setOver(true); setRunning(false); return prev; + } + const ate = next.x === food.x && next.y === food.y; + const newSnake = [next, ...prev]; + if (!ate) newSnake.pop(); + else { + setScore((s) => { const v = s + 1; onScore?.(v); return v; }); + setFood(randCell(newSnake)); + } + return newSnake; + }); + }, TICK_MS); + return () => clearInterval(id); + }, [running, over, dir, food, onScore]); + + function reset() { + setSnake(INITIAL_SNAKE); + setDir(INITIAL_DIR); + setFood(INITIAL_FOOD); + setScore(0); + setOver(false); + setRunning(true); + } + + return ( + <div className="select-none"> + <div className="flex items-center justify-between mb-2 text-[11px] text-zinc-400"> + <span>Score: <span className="text-white font-mono">{score}</span></span> + <span className="text-zinc-600"> + {over ? 'Game over' : running ? 'playing' : 'paused — space to resume'} + </span> + </div> + <div + className="grid bg-black border border-border rounded-md overflow-hidden" + style={{ gridTemplateColumns: `repeat(${GRID}, 1fr)`, aspectRatio: '1 / 1' }} + > + {Array.from({ length: GRID * GRID }).map((_, i) => { + const x = i % GRID; + const y = Math.floor(i / GRID); + const isHead = snake[0].x === x && snake[0].y === y; + const isBody = !isHead && snake.some((p) => p.x === x && p.y === y); + const isFood = food.x === x && food.y === y; + return ( + <div + key={i} + className={ + isHead ? 'bg-primary' : + isBody ? 'bg-primary/60' : + isFood ? 'bg-success rounded-sm' : + 'bg-zinc-900/40' + } + /> + ); + })} + </div> + <div className="mt-3 flex items-center justify-between text-[11px] text-zinc-500"> + <span>Arrow keys / WASD · space to pause</span> + {over && ( + <button onClick={reset} className="px-2 py-1 rounded-md bg-primary/20 text-primary text-[11px] hover:bg-primary/30"> + Play again + </button> + )} + </div> + </div> + ); +} diff --git a/frontend/src/components/ui/StatCard.jsx b/frontend/src/components/ui/StatCard.jsx new file mode 100644 index 00000000..16cf1cf5 --- /dev/null +++ b/frontend/src/components/ui/StatCard.jsx @@ -0,0 +1,19 @@ +// StatCard — single-stat panel for the Dashboard. Lucide icon optional. + +export default function StatCard({ label, value, delta, tone = 'default', icon: Icon }) { + const tones = { + default: 'text-white', + accent: 'text-primary', + success: 'text-success', + }; + return ( + <div className="rounded-xl border border-border bg-surface p-5"> + <div className="flex items-center justify-between mb-3"> + <span className="text-[11px] uppercase tracking-wider text-zinc-500">{label}</span> + {Icon && <Icon size={16} className="text-zinc-600" />} + </div> + <div className={`text-2xl font-semibold ${tones[tone] || tones.default}`}>{value}</div> + {delta && <div className="text-[11px] text-zinc-500 mt-1">{delta}</div>} + </div> + ); +} diff --git a/frontend/src/hooks/useWizard.js b/frontend/src/hooks/useWizard.js new file mode 100644 index 00000000..b23932b6 --- /dev/null +++ b/frontend/src/hooks/useWizard.js @@ -0,0 +1,109 @@ +// Generic wizard state machine. Holds { step, data } via useReducer with +// optional localStorage persistence keyed by `storageKey`. Step components +// stay dumb — they receive { step, data, setData, next, back, goto, reset }. +// +// Steps array: +// [ +// { id: 'upload', label: 'Upload' }, +// { id: 'categorize', label: 'Categorize' }, +// { id: 'processing', label: 'Processing', lock: true }, +// { id: 'review', label: 'Review' }, +// ] +// +// `lock: true` disables BACK while on that step (used for Processing — you +// can't undo work in flight; only forward/skip or reset). + +import { useEffect, useReducer, useRef } from 'react'; + +function reducer(state, action) { + switch (action.type) { + case 'NEXT': + return { ...state, step: Math.min(state.step + 1, state.maxStep) }; + case 'BACK': + return { ...state, step: Math.max(0, state.step - 1) }; + case 'GOTO': + return { ...state, step: Math.max(0, Math.min(action.step, state.maxStep)) }; + case 'SET_DATA': + return { + ...state, + data: typeof action.data === 'function' + ? action.data(state.data) + : { ...state.data, ...action.data }, + }; + case 'RESET': + return { ...state, step: 0, data: action.initialData }; + case 'REHYDRATE': + return action.state; + default: + return state; + } +} + +export function useWizard({ steps, initialData = {}, storageKey = null }) { + const maxStep = steps.length - 1; + + const initial = useRef({ + step: 0, + data: initialData, + maxStep, + }); + + const [state, dispatch] = useReducer(reducer, initial.current); + + // Rehydrate once from localStorage. File objects don't survive JSON + // round-trips, so callers should treat persisted File fields as "may be + // missing on reload" and recover gracefully. + useEffect(() => { + if (!storageKey) return; + try { + const raw = localStorage.getItem(storageKey); + if (!raw) return; + const saved = JSON.parse(raw); + if (saved && typeof saved.step === 'number') { + dispatch({ + type: 'REHYDRATE', + state: { + step: Math.min(saved.step, maxStep), + data: { ...initialData, ...(saved.data || {}) }, + maxStep, + }, + }); + } + } catch {/* ignore */} + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [storageKey]); + + // Persist on every change. + useEffect(() => { + if (!storageKey) return; + try { + localStorage.setItem(storageKey, JSON.stringify({ + step: state.step, + data: state.data, + })); + } catch {/* localStorage full or non-serializable — ignore */} + }, [state.step, state.data, storageKey]); + + const currentStep = steps[state.step]; + const isLocked = !!currentStep?.lock; + + return { + step: state.step, + currentStep, + steps, + data: state.data, + setData: (patch) => dispatch({ type: 'SET_DATA', data: patch }), + next: () => dispatch({ type: 'NEXT' }), + back: () => { if (!isLocked) dispatch({ type: 'BACK' }); }, + goto: (s) => dispatch({ type: 'GOTO', step: s }), + reset: () => { + dispatch({ type: 'RESET', initialData }); + if (storageKey) localStorage.removeItem(storageKey); + }, + canBack: state.step > 0 && !isLocked, + canNext: state.step < maxStep, + isFirst: state.step === 0, + isLast: state.step === maxStep, + isLocked, + }; +} diff --git a/frontend/src/pages/ShortForm.jsx b/frontend/src/pages/ShortForm.jsx deleted file mode 100644 index 40a28b84..00000000 --- a/frontend/src/pages/ShortForm.jsx +++ /dev/null @@ -1,16 +0,0 @@ -import PageStub from './PageStub.jsx'; - -export default function ShortForm() { - return ( - <PageStub - title="Short-form" - description="Upload up to 5 videos. The wizard categorizes each clip, applies the right layout, and exports to TikTok, Reels, and Shorts." - todo={[ - 'Phase 3: 4-step wizard (Upload → Categorize → Processing → Review)', - 'Phase 3: per-clip progress + Snake mini-game during processing', - 'Phase 3: phone-shaped preview + Before/After toggle + export bar', - 'History tab listing past batches', - ]} - /> - ); -} diff --git a/frontend/src/pages/ShortForm/History.jsx b/frontend/src/pages/ShortForm/History.jsx new file mode 100644 index 00000000..70c1150e --- /dev/null +++ b/frontend/src/pages/ShortForm/History.jsx @@ -0,0 +1,59 @@ +// Past short-form batches. Reads from localStorage 'openshorts.shortForm.history'. +// Backend index endpoint replaces this in a later phase. +// TODO(backend): plan TODO #10 — GET /api/clips/recent?limit=20 for the live feed. + +import { useEffect, useState } from 'react'; +import { Archive } from 'lucide-react'; + +const HISTORY_KEY = 'openshorts.shortForm.history'; + +function loadHistory() { + try { + const raw = localStorage.getItem(HISTORY_KEY); + if (!raw) return []; + const parsed = JSON.parse(raw); + return Array.isArray(parsed) ? parsed : []; + } catch { + return []; + } +} + +export default function History() { + const [items, setItems] = useState([]); + + useEffect(() => { + setItems(loadHistory()); + }, []); + + if (items.length === 0) { + return ( + <div className="h-full flex flex-col items-center justify-center text-zinc-500 p-12"> + <Archive size={36} className="text-zinc-700 mb-3" /> + <div className="text-[14px] text-white font-medium">No past batches yet</div> + <p className="text-[12px] text-zinc-500 mt-1 text-center max-w-md"> + Each completed short-form batch is saved here. Re-download outputs or re-run a clip in a new wizard pass (coming with the backend index endpoint). + </p> + </div> + ); + } + + return ( + <div className="p-6 space-y-3 overflow-y-auto custom-scrollbar h-full"> + {items.map((item) => ( + <div key={item.id} className="rounded-xl border border-border bg-surface p-4"> + <div className="flex items-center justify-between"> + <div className="min-w-0"> + <div className="text-[13px] font-medium text-white truncate">{item.title || 'Untitled batch'}</div> + <div className="text-[11px] text-zinc-500 mt-0.5"> + {new Date(item.ts).toLocaleString()} · {item.clipCount || 0} clip{item.clipCount === 1 ? '' : 's'} + </div> + </div> + <span className="text-[10px] uppercase tracking-wider px-2 py-0.5 rounded border border-border bg-white/5 text-zinc-500 shrink-0 ml-3"> + Saved + </span> + </div> + </div> + ))} + </div> + ); +} diff --git a/frontend/src/pages/ShortForm/Wizard.jsx b/frontend/src/pages/ShortForm/Wizard.jsx new file mode 100644 index 00000000..7b2569d2 --- /dev/null +++ b/frontend/src/pages/ShortForm/Wizard.jsx @@ -0,0 +1,83 @@ +// 4-step short-form wizard. Owns wizard state; step components stay dumb. +// Persists step + non-File data in localStorage so accidental reloads +// keep progress (File handles don't survive JSON; see useWizard.js notes). + +import { Check } from 'lucide-react'; +import { useWizard } from '../../hooks/useWizard.js'; +import Upload from './steps/Upload.jsx'; +import Categorize from './steps/Categorize.jsx'; +import Processing from './steps/Processing.jsx'; +import Review from './steps/Review.jsx'; + +const STEPS = [ + { id: 'upload', label: 'Upload' }, + { id: 'categorize', label: 'Categorize' }, + { id: 'processing', label: 'Processing', lock: true }, + { id: 'review', label: 'Review' }, +]; + +const INITIAL = { + files: [], + settings: { + colorGrade: true, + autoSubtitles: true, + silenceRemoval: false, + faceLayout: true, + }, + jobs: {}, +}; + +const STORAGE_KEY = 'openshorts.shortForm.wizard'; + +export default function Wizard() { + const w = useWizard({ steps: STEPS, initialData: INITIAL, storageKey: STORAGE_KEY }); + + return ( + <div className="h-full flex flex-col"> + <StepIndicator wizard={w} /> + <div className="flex-1 overflow-hidden"> + {w.currentStep.id === 'upload' && <Upload wizard={w} />} + {w.currentStep.id === 'categorize' && <Categorize wizard={w} />} + {w.currentStep.id === 'processing' && <Processing wizard={w} />} + {w.currentStep.id === 'review' && <Review wizard={w} />} + </div> + </div> + ); +} + +function StepIndicator({ wizard }) { + return ( + <div className="px-6 py-4 border-b border-border bg-background shrink-0"> + <div className="flex items-center gap-3"> + {wizard.steps.map((s, i) => { + const active = i === wizard.step; + const done = i < wizard.step; + const reachable = i <= wizard.step && !wizard.isLocked; + return ( + <div key={s.id} className="flex items-center gap-3 flex-1"> + <button + onClick={() => reachable && wizard.goto(i)} + disabled={!reachable} + className={`flex items-center gap-2 disabled:cursor-not-allowed ${ + active ? 'text-white' : done ? 'text-zinc-300' : 'text-zinc-600' + }`} + > + <span className={`w-6 h-6 flex items-center justify-center rounded-full text-[11px] font-medium ${ + active ? 'bg-primary text-white' : + done ? 'bg-success/20 text-success border border-success/40' : + 'bg-white/5 text-zinc-500 border border-border' + }`}> + {done ? <Check size={12} /> : i + 1} + </span> + <span className="text-[12px]">{s.label}</span> + </button> + {i < wizard.steps.length - 1 && ( + <div className={`flex-1 h-px ${done ? 'bg-success/40' : 'bg-border'}`} /> + )} + </div> + ); + })} + </div> + </div> + ); +} diff --git a/frontend/src/pages/ShortForm/index.jsx b/frontend/src/pages/ShortForm/index.jsx new file mode 100644 index 00000000..a232bfc7 --- /dev/null +++ b/frontend/src/pages/ShortForm/index.jsx @@ -0,0 +1,52 @@ +// Short-form workflow: 4-step wizard for vertical clip generation from up +// to 5 source videos, with a sibling History tab. The parent App.jsx +// route mounts this under /short-form/*; everything inside here is +// router-local. + +import { NavLink, Outlet, Route, Routes } from 'react-router-dom'; +import Wizard from './Wizard.jsx'; +import History from './History.jsx'; + +function Shell() { + return ( + <div className="h-full flex flex-col"> + <div className="px-6 pt-5 pb-3 border-b border-border bg-background flex items-center gap-1 shrink-0"> + <NavLink + to="/short-form" + end + className={({ isActive }) => + `text-[13px] px-3 py-1.5 rounded-md transition-colors ${ + isActive ? 'bg-white/10 text-white' : 'text-zinc-400 hover:text-white' + }` + } + > + Wizard + </NavLink> + <NavLink + to="/short-form/history" + className={({ isActive }) => + `text-[13px] px-3 py-1.5 rounded-md transition-colors ${ + isActive ? 'bg-white/10 text-white' : 'text-zinc-400 hover:text-white' + }` + } + > + History + </NavLink> + </div> + <div className="flex-1 overflow-hidden"> + <Outlet /> + </div> + </div> + ); +} + +export default function ShortForm() { + return ( + <Routes> + <Route element={<Shell />}> + <Route index element={<Wizard />} /> + <Route path="history" element={<History />} /> + </Route> + </Routes> + ); +} diff --git a/frontend/src/pages/ShortForm/steps/Categorize.jsx b/frontend/src/pages/ShortForm/steps/Categorize.jsx new file mode 100644 index 00000000..7aeb938d --- /dev/null +++ b/frontend/src/pages/ShortForm/steps/Categorize.jsx @@ -0,0 +1,140 @@ +// Step 2: Categorize. Four category cards per uploaded clip + an auto-edit +// settings block. +// +// AI categorization is stubbed — see plan TODO #2. We pre-fill 'educational' +// for new clips and let the user override per clip. + +import { useEffect } from 'react'; +import { GraduationCap, Mic, Sparkles, Tv } from 'lucide-react'; + +const CATEGORIES = [ + { + id: 'educational', + label: 'Educational', + icon: GraduationCap, + description: 'Talking-head explainers, lectures, walk-throughs.', + }, + { + id: 'yap', + label: 'Yap', + icon: Mic, + description: 'Podcasts, casual rants, multi-speaker chats.', + }, + { + id: 'live', + label: 'Live', + icon: Tv, + description: 'Streams, gameplay reactions, IRL moments.', + }, + { + id: 'viral', + label: 'Viral', + icon: Sparkles, + description: 'Fast-cut highlights, memes, micro-moments.', + }, +]; + +const TOGGLES = [ + { id: 'colorGrade', label: 'Color grade', hint: 'Apply a cinematic LUT (backend TODO #5).' }, + { id: 'autoSubtitles', label: 'Auto subtitles', hint: 'Transcribe + burn captions with brand-kit style.' }, + { id: 'silenceRemoval', label: 'Silence removal', hint: 'Auto-cut dead air (backend TODO #4).' }, + { id: 'faceLayout', label: 'Face-focus layout', hint: 'Lock crop to detected speakers.' }, +]; + +export default function Categorize({ wizard }) { + const files = wizard.data.files || []; + const settings = wizard.data.settings || {}; + + // TODO(backend): plan TODO #2 — replace this pre-fill with POST /api/categorize + // on the file's transcript or thumbnail. + useEffect(() => { + const next = files.map((f) => (f.category ? f : { ...f, category: 'educational' })); + if (next.some((f, i) => f !== files[i])) { + wizard.setData({ files: next }); + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [files.length]); + + function setCategory(fileId, categoryId) { + wizard.setData({ files: files.map((f) => f.id === fileId ? { ...f, category: categoryId } : f) }); + } + + function toggle(key) { + wizard.setData({ settings: { ...settings, [key]: !settings[key] } }); + } + + return ( + <div className="h-full overflow-y-auto custom-scrollbar"> + <div className="p-6 max-w-5xl mx-auto space-y-8"> + <header> + <h1 className="text-[18px] font-semibold text-white">Categorize</h1> + <p className="text-[13px] text-zinc-500 mt-1"> + Pick a category per clip — it tunes the layout and editing style. + AI categorization lands with a backend update; defaults are pre-selected. + </p> + </header> + + <section className="space-y-3"> + <h2 className="text-[12px] uppercase tracking-wider text-zinc-500">Clips</h2> + <div className="space-y-3"> + {files.map((f) => ( + <div key={f.id} className="rounded-xl border border-border bg-surface p-4"> + <div className="text-[13px] text-white font-medium mb-3 truncate">{f.name}</div> + <div className="grid grid-cols-2 md:grid-cols-4 gap-2"> + {CATEGORIES.map((c) => { + const Icon = c.icon; + const active = f.category === c.id; + return ( + <button + key={c.id} + onClick={() => setCategory(f.id, c.id)} + className={`text-left rounded-lg border p-3 transition-colors ${ + active ? 'border-primary bg-primary/10' : 'border-border hover:bg-white/5' + }`} + > + <div className={`flex items-center gap-2 text-[12px] font-medium ${active ? 'text-primary' : 'text-white'}`}> + <Icon size={14} /> + {c.label} + </div> + <p className="text-[10px] text-zinc-500 mt-1 leading-snug">{c.description}</p> + </button> + ); + })} + </div> + </div> + ))} + </div> + </section> + + <section className="rounded-xl border border-border bg-surface p-5 space-y-3"> + <h2 className="text-[12px] uppercase tracking-wider text-zinc-500">Auto-edit settings</h2> + <div className="grid grid-cols-1 md:grid-cols-2 gap-3"> + {TOGGLES.map((t) => ( + <label key={t.id} className="flex items-start gap-3 rounded-lg border border-border p-3 cursor-pointer hover:bg-white/5"> + <input + type="checkbox" + checked={!!settings[t.id]} + onChange={() => toggle(t.id)} + className="mt-1 accent-primary" + /> + <div> + <div className="text-[13px] text-white">{t.label}</div> + <div className="text-[11px] text-zinc-500 mt-0.5">{t.hint}</div> + </div> + </label> + ))} + </div> + </section> + + <div className="flex items-center justify-between pt-4 border-t border-border"> + <button onClick={wizard.back} className="text-[13px] text-zinc-400 hover:text-white transition-colors"> + ← Back + </button> + <button onClick={wizard.next} className="btn-primary px-5 py-2 text-[13px]"> + Start processing → + </button> + </div> + </div> + </div> + ); +} diff --git a/frontend/src/pages/ShortForm/steps/Processing.jsx b/frontend/src/pages/ShortForm/steps/Processing.jsx new file mode 100644 index 00000000..44cd7213 --- /dev/null +++ b/frontend/src/pages/ShortForm/steps/Processing.jsx @@ -0,0 +1,224 @@ +// Step 3: Processing. Fires up to N parallel POST /api/process calls (the +// backend has no batch endpoint — see plan TODO #1). Each file gets its +// own progress row that mirrors the existing jobStore polling shape. +// +// User can play SnakeGame while waiting; Skip advances to Review with +// whatever clips have finished (failed jobs are skipped in Review). +// +// TODO(backend): plan TODO #1 — replace the per-file loop with POST +// /api/process/batch returning a list of job ids. + +import { useEffect, useRef, useState } from 'react'; +import { CheckCircle2, Loader2, XCircle } from 'lucide-react'; +import { getApiUrl } from '../../../config'; +import { useKeys } from '../../../state/keysStore.js'; +import SnakeGame from '../../../components/ui/SnakeGame.jsx'; + +const POLL_MS = 2000; +const HISTORY_KEY = 'openshorts.shortForm.history'; + +async function startJob({ file, geminiKey }) { + const formData = new FormData(); + formData.append('file', file); + formData.append('acknowledged', 'true'); + const res = await fetch(getApiUrl('/api/process'), { + method: 'POST', + headers: { 'X-Gemini-Key': geminiKey }, + body: formData, + }); + if (!res.ok) throw new Error(await res.text()); + return res.json(); +} + +async function fetchStatus(jobId) { + const res = await fetch(getApiUrl(`/api/status/${jobId}`)); + if (!res.ok) throw new Error(await res.text()); + return res.json(); +} + +function saveHistory(entry) { + try { + const raw = localStorage.getItem(HISTORY_KEY); + const list = raw ? JSON.parse(raw) : []; + list.unshift(entry); + localStorage.setItem(HISTORY_KEY, JSON.stringify(list.slice(0, 50))); + } catch {/* ignore */} +} + +export default function Processing({ wizard }) { + const keys = useKeys(); + const files = wizard.data.files || []; + const jobs = wizard.data.jobs || {}; + const startedRef = useRef(false); + const historySavedRef = useRef(false); + const [overallStatus, setOverallStatus] = useState('starting'); + + // Kick off jobs once on first mount. + useEffect(() => { + if (startedRef.current) return; + startedRef.current = true; + if (!keys.gemini) { + setOverallStatus('error'); + return; + } + setOverallStatus('running'); + files.forEach(async (f) => { + if (jobs[f.id]?.jobId) return; + // File object may not exist after a wizard rehydrate. + if (!(f.file instanceof File)) { + wizard.setData((prev) => ({ + ...prev, + jobs: { ...prev.jobs, [f.id]: { jobId: null, status: 'error', logs: ['Source file lost — re-upload to retry.'], result: null } }, + })); + return; + } + try { + const { job_id } = await startJob({ file: f.file, geminiKey: keys.gemini }); + wizard.setData((prev) => ({ + ...prev, + jobs: { ...prev.jobs, [f.id]: { jobId: job_id, status: 'processing', logs: [], result: null } }, + })); + } catch (e) { + wizard.setData((prev) => ({ + ...prev, + jobs: { ...prev.jobs, [f.id]: { jobId: null, status: 'error', logs: [String(e.message || e)], result: null } }, + })); + } + }); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + // Poll status for every still-running job. + useEffect(() => { + const active = Object.entries(jobs).filter(([, j]) => j.jobId && j.status === 'processing'); + if (active.length === 0) return; + const id = setInterval(async () => { + for (const [fileId, j] of active) { + try { + const data = await fetchStatus(j.jobId); + wizard.setData((prev) => ({ + ...prev, + jobs: { + ...prev.jobs, + [fileId]: { + ...prev.jobs[fileId], + status: data.status || prev.jobs[fileId].status, + logs: data.logs || prev.jobs[fileId].logs, + result: data.results || prev.jobs[fileId].result, + }, + }, + })); + } catch (e) { + wizard.setData((prev) => ({ + ...prev, + jobs: { + ...prev.jobs, + [fileId]: { ...prev.jobs[fileId], status: 'error', logs: [...(prev.jobs[fileId].logs || []), String(e.message || e)] }, + }, + })); + } + } + }, POLL_MS); + return () => clearInterval(id); + // Re-subscribe whenever the set of active job statuses changes. + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [Object.values(jobs).map((j) => j.status).join(',')]); + + // Detect all-done + persist a history entry once. + useEffect(() => { + const entries = Object.values(jobs); + if (entries.length < files.length) return; + const done = entries.every((j) => j.status === 'complete' || j.status === 'error'); + if (!done) return; + setOverallStatus('complete'); + if (historySavedRef.current) return; + historySavedRef.current = true; + saveHistory({ + id: `${Date.now()}-${Math.random().toString(36).slice(2, 6)}`, + ts: Date.now(), + clipCount: entries.reduce((sum, j) => sum + (j.result?.clips?.length || 0), 0), + jobs: entries.map((j) => j.jobId).filter(Boolean), + title: `Batch of ${files.length} file${files.length === 1 ? '' : 's'}`, + }); + }, [jobs, files.length]); + + const hasAnyComplete = Object.values(jobs).some((j) => j.status === 'complete'); + + return ( + <div className="h-full overflow-y-auto custom-scrollbar"> + <div className="p-6 max-w-5xl mx-auto grid grid-cols-1 lg:grid-cols-2 gap-6"> + <section> + <h1 className="text-[18px] font-semibold text-white">Processing</h1> + <p className="text-[13px] text-zinc-500 mt-1 mb-4"> + Each file runs through the pipeline in parallel. Backend batch + (plan TODO #1) will replace these per-file calls. + </p> + + <div className="space-y-2"> + {files.map((f) => { + const j = jobs[f.id]; + const status = j?.status || 'queued'; + const lastLog = j?.logs?.[j.logs.length - 1]; + return ( + <div key={f.id} className="rounded-lg border border-border bg-surface p-3"> + <div className="flex items-center gap-3"> + <StatusIcon status={status} /> + <div className="flex-1 min-w-0"> + <div className="text-[13px] text-white truncate">{f.name}</div> + <div className="text-[11px] text-zinc-500 truncate"> + {status === 'queued' ? 'Queued…' : + status === 'complete' ? `Generated ${j.result?.clips?.length || 0} clip${(j.result?.clips?.length || 0) === 1 ? '' : 's'}` : + status === 'error' ? (lastLog || 'Failed') : + (lastLog || 'Processing…')} + </div> + </div> + {j?.jobId && <span className="text-[10px] font-mono text-zinc-600 shrink-0">{j.jobId.slice(0, 8)}</span>} + </div> + </div> + ); + })} + </div> + + <div className="flex items-center justify-between mt-6 pt-4 border-t border-border"> + <span className="text-[11px] text-zinc-500"> + {overallStatus === 'complete' ? 'All files done.' : + overallStatus === 'error' ? 'Missing Gemini key — set it in Settings.' : + 'You can wait, play Snake, or skip to whatever has finished.'} + </span> + <div className="flex items-center gap-3"> + <button + onClick={() => wizard.goto(3)} + disabled={!hasAnyComplete} + className="text-[13px] text-zinc-400 hover:text-white transition-colors disabled:opacity-40 disabled:cursor-not-allowed" + > + Skip → + </button> + <button + onClick={() => wizard.goto(3)} + disabled={overallStatus !== 'complete'} + className="btn-primary px-5 py-2 text-[13px] disabled:opacity-40 disabled:cursor-not-allowed" + > + Review → + </button> + </div> + </div> + </section> + + <aside className="rounded-xl border border-border bg-surface p-5"> + <div className="mb-3"> + <h2 className="text-[14px] font-semibold text-white">Pass the time</h2> + <p className="text-[12px] text-zinc-500 mt-0.5">Render times scale with clip length — keep your hands busy.</p> + </div> + <SnakeGame /> + </aside> + </div> + </div> + ); +} + +function StatusIcon({ status }) { + if (status === 'complete') return <CheckCircle2 size={16} className="text-success" />; + if (status === 'error') return <XCircle size={16} className="text-red-400" />; + if (status === 'queued') return <span className="w-2 h-2 rounded-full bg-zinc-700 inline-block mx-[3px]" />; + return <Loader2 size={16} className="text-primary animate-spin" />; +} diff --git a/frontend/src/pages/ShortForm/steps/Review.jsx b/frontend/src/pages/ShortForm/steps/Review.jsx new file mode 100644 index 00000000..21392da5 --- /dev/null +++ b/frontend/src/pages/ShortForm/steps/Review.jsx @@ -0,0 +1,195 @@ +// Step 4: Review. Split view — clip list (left) + phone preview + export bar. +// +// Export wiring: +// - Download: opens the generated clip URL (existing /api/status results). +// - Publish: pushes a notification + would call POST /api/social/post. +// Backend doesn't queue these yet (plan TODO #9), so we +// surface the intent locally via the bell. +// - Schedule: same path as Publish with status='scheduled'. +// - Send to CapCut: placeholder — backend integration TODO. + +import { useEffect, useMemo, useState } from 'react'; +import { Download, Eye, Scissors } from 'lucide-react'; +import PhoneFrame from '../../../components/ui/PhoneFrame.jsx'; +import PlatformBadge from '../../../components/ui/PlatformBadge.jsx'; +import { getApiUrl } from '../../../config'; +import { pushNotification } from '../../../state/notificationsStore.js'; + +const PLATFORMS = ['youtube', 'tiktok', 'instagram', 'snapchat', 'facebook']; + +function flattenClips(jobs, files) { + const out = []; + for (const f of files) { + const j = jobs[f.id]; + if (!j?.result?.clips) continue; + j.result.clips.forEach((clip, i) => { + out.push({ + jobId: j.jobId, + fileId: f.id, + sourceName: f.name, + sourceFile: f.file instanceof File ? f.file : null, + clipIndex: i, + clip, + }); + }); + } + return out; +} + +export default function Review({ wizard }) { + const files = wizard.data.files || []; + const jobs = wizard.data.jobs || {}; + const clips = useMemo(() => flattenClips(jobs, files), [jobs, files]); + const [selected, setSelected] = useState(0); + const [showOriginal, setShowOriginal] = useState(false); + const [sourceUrl, setSourceUrl] = useState(null); + + const current = clips[Math.min(selected, clips.length - 1)] || null; + const clipUrl = current?.clip?.video_url ? getApiUrl(current.clip.video_url) : null; + + // Build a blob URL for the original source file — only available when + // the wizard has the in-memory File (lost after reload). + useEffect(() => { + if (!current?.sourceFile) { setSourceUrl(null); return; } + const url = URL.createObjectURL(current.sourceFile); + setSourceUrl(url); + return () => URL.revokeObjectURL(url); + }, [current?.sourceFile]); + + if (clips.length === 0) { + return ( + <div className="h-full flex flex-col items-center justify-center p-12 text-center text-zinc-500"> + <p className="text-[14px] text-white font-medium">No finished clips yet.</p> + <p className="text-[12px] mt-1">Go back to Processing and wait, or restart the wizard.</p> + <button onClick={wizard.reset} className="mt-4 btn-primary px-4 py-2 text-[13px]"> + Start over + </button> + </div> + ); + } + + function publish(platform, scheduled) { + if (!current) return; + pushNotification({ + type: 'publish', + platform, + status: scheduled ? 'scheduled' : 'submitted', + jobId: current.jobId, + message: scheduled + ? `Clip ${current.clipIndex + 1} scheduled to ${platform}` + : `Clip ${current.clipIndex + 1} sent to ${platform}`, + }); + // TODO(backend): plan TODO #9 — wire to /api/social/post once the + // publish_jobs queue + status endpoint land. + } + + const title = current?.clip?.video_title_for_youtube_short || current?.clip?.title || ''; + const description = + current?.clip?.video_description_for_instagram || + current?.clip?.video_description_for_tiktok || + current?.clip?.description || + ''; + + return ( + <div className="h-full flex"> + <aside className="w-[230px] shrink-0 border-r border-border bg-background overflow-y-auto custom-scrollbar p-3 space-y-1"> + <div className="text-[11px] uppercase tracking-wider text-zinc-500 px-2 mb-2"> + {clips.length} clip{clips.length === 1 ? '' : 's'} + </div> + {clips.map((c, i) => { + const active = i === selected; + const clipTitle = c.clip?.video_title_for_youtube_short || c.clip?.title; + return ( + <button + key={`${c.jobId}-${c.clipIndex}`} + onClick={() => { setSelected(i); setShowOriginal(false); }} + className={`w-full text-left rounded-lg p-2 transition-colors ${ + active ? 'bg-primary/15 border border-primary/30' : 'border border-transparent hover:bg-white/5' + }`} + > + <div className={`text-[12px] font-medium truncate ${active ? 'text-white' : 'text-zinc-300'}`}> + Clip {i + 1} + </div> + <div className="text-[10px] text-zinc-500 truncate mt-0.5">{c.sourceName}</div> + {clipTitle && ( + <div className="text-[10px] text-zinc-400 truncate mt-1 italic">"{clipTitle}"</div> + )} + </button> + ); + })} + </aside> + + <div className="flex-1 flex flex-col overflow-hidden"> + <div className="flex-1 overflow-y-auto custom-scrollbar p-6 flex flex-col items-center gap-4"> + <div className="flex items-center gap-2 text-[12px]"> + <button + onClick={() => setShowOriginal(false)} + className={`px-3 py-1.5 rounded-md ${!showOriginal ? 'bg-white/10 text-white' : 'text-zinc-400 hover:text-white'}`} + > + After + </button> + <button + onClick={() => setShowOriginal(true)} + disabled={!sourceUrl} + className={`px-3 py-1.5 rounded-md disabled:opacity-30 disabled:cursor-not-allowed ${showOriginal ? 'bg-white/10 text-white' : 'text-zinc-400 hover:text-white'}`} + > + <Eye size={12} className="inline mr-1" /> Before + </button> + </div> + + <PhoneFrame size="md"> + {showOriginal && sourceUrl ? ( + <video key={`src-${selected}`} src={sourceUrl} controls className="w-full h-full object-contain" /> + ) : clipUrl ? ( + <video key={`clip-${selected}`} src={clipUrl} controls className="w-full h-full object-cover" /> + ) : ( + <div className="text-zinc-600 text-[12px] p-4 text-center">No preview available.</div> + )} + </PhoneFrame> + + {title && ( + <div className="text-center max-w-md"> + <div className="text-[13px] text-white font-medium">{title}</div> + {description && ( + <p className="text-[11px] text-zinc-500 mt-1 leading-snug whitespace-pre-line">{description}</p> + )} + </div> + )} + </div> + + <div className="border-t border-border bg-surface px-4 py-3 flex flex-wrap items-center gap-3 shrink-0"> + <a + href={clipUrl || '#'} + download + className={`btn-primary px-3 py-2 text-[12px] flex items-center gap-2 ${!clipUrl ? 'opacity-40 pointer-events-none' : ''}`} + > + <Download size={12} /> Download + </a> + <div className="flex items-center gap-1"> + <span className="text-[11px] text-zinc-500 mr-1">Publish:</span> + {PLATFORMS.map((p) => ( + <button key={p} onClick={() => publish(p, false)} className="hover:opacity-80 transition-opacity" title={`Publish to ${p}`}> + <PlatformBadge platform={p} withLabel={false} size="sm" /> + </button> + ))} + </div> + <div className="flex items-center gap-1"> + <span className="text-[11px] text-zinc-500 mr-1">Schedule:</span> + {PLATFORMS.map((p) => ( + <button key={p} onClick={() => publish(p, true)} className="hover:opacity-80 transition-opacity" title={`Schedule to ${p}`}> + <PlatformBadge platform={p} withLabel={false} size="sm" /> + </button> + ))} + </div> + <button + disabled + title="CapCut export — coming soon" + className="ml-auto px-3 py-2 text-[12px] flex items-center gap-2 rounded-md border border-border text-zinc-500 cursor-not-allowed" + > + <Scissors size={12} /> Send to CapCut + </button> + </div> + </div> + </div> + ); +} diff --git a/frontend/src/pages/ShortForm/steps/Upload.jsx b/frontend/src/pages/ShortForm/steps/Upload.jsx new file mode 100644 index 00000000..610ee0af --- /dev/null +++ b/frontend/src/pages/ShortForm/steps/Upload.jsx @@ -0,0 +1,128 @@ +// Step 1: Upload. Drag-drop + browse, up to 5 files, MP4/MOV <= 2 GB. +// Each entry: { id, file (File), name, size } + +import { useRef, useState } from 'react'; +import { FileVideo, UploadCloud, X } from 'lucide-react'; + +const MAX_FILES = 5; +const MAX_SIZE_BYTES = 2 * 1024 * 1024 * 1024; +const ALLOWED_TYPES = ['video/mp4', 'video/quicktime']; + +function nextId() { return `${Date.now()}-${Math.random().toString(36).slice(2, 7)}`; } + +function fmtSize(bytes) { + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`; + if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; + return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`; +} + +export default function Upload({ wizard }) { + const inputRef = useRef(null); + const [dragOver, setDragOver] = useState(false); + const [error, setError] = useState(''); + + const files = wizard.data.files || []; + + function addFiles(list) { + setError(''); + const incoming = Array.from(list); + const accepted = []; + for (const f of incoming) { + if (files.length + accepted.length >= MAX_FILES) { + setError(`Up to ${MAX_FILES} files per batch.`); + break; + } + const okType = ALLOWED_TYPES.includes(f.type) || /\.(mp4|mov)$/i.test(f.name); + if (!okType) { setError(`${f.name}: only MP4 / MOV files.`); continue; } + if (f.size > MAX_SIZE_BYTES) { setError(`${f.name}: over 2 GB.`); continue; } + accepted.push({ id: nextId(), file: f, name: f.name, size: f.size }); + } + if (accepted.length) wizard.setData({ files: [...files, ...accepted] }); + } + + function removeFile(id) { + wizard.setData({ files: files.filter((f) => f.id !== id) }); + } + + return ( + <div className="h-full overflow-y-auto custom-scrollbar"> + <div className="p-6 max-w-3xl mx-auto space-y-6"> + <header> + <h1 className="text-[18px] font-semibold text-white">Upload videos</h1> + <p className="text-[13px] text-zinc-500 mt-1"> + Drop up to {MAX_FILES} source videos. MP4 or MOV, up to 2 GB each. + </p> + </header> + + <div + onDragOver={(e) => { e.preventDefault(); setDragOver(true); }} + onDragLeave={() => setDragOver(false)} + onDrop={(e) => { + e.preventDefault(); + setDragOver(false); + if (e.dataTransfer.files?.length) addFiles(e.dataTransfer.files); + }} + onClick={() => inputRef.current?.click()} + className={`rounded-xl border-2 border-dashed p-10 text-center cursor-pointer transition-colors ${ + dragOver ? 'border-primary bg-primary/10' : 'border-border bg-surface hover:bg-white/5' + }`} + > + <UploadCloud size={36} className={`mx-auto mb-3 ${dragOver ? 'text-primary' : 'text-zinc-500'}`} /> + <div className="text-[14px] text-white font-medium"> + Drop videos here or click to browse + </div> + <div className="text-[11px] text-zinc-500 mt-1"> + MP4 / MOV · up to 2 GB · up to {MAX_FILES} per batch + </div> + <input + ref={inputRef} + type="file" + accept="video/mp4,video/quicktime,.mp4,.mov" + multiple + className="hidden" + onChange={(e) => addFiles(e.target.files || [])} + /> + </div> + + {error && <div className="text-[12px] text-red-400">{error}</div>} + + {files.length > 0 && ( + <div className="space-y-2"> + <div className="text-[11px] uppercase tracking-wider text-zinc-500"> + {files.length} of {MAX_FILES} files + </div> + {files.map((f) => ( + <div key={f.id} className="flex items-center gap-3 rounded-lg border border-border bg-surface p-3"> + <FileVideo size={18} className="text-zinc-500 shrink-0" /> + <div className="flex-1 min-w-0"> + <div className="text-[13px] text-white truncate">{f.name}</div> + <div className="text-[11px] text-zinc-500">{fmtSize(f.size)}</div> + </div> + <button + onClick={() => removeFile(f.id)} + className="p-1.5 text-zinc-500 hover:text-red-400 transition-colors" + aria-label={`Remove ${f.name}`} + > + <X size={14} /> + </button> + </div> + ))} + </div> + )} + + <div className="flex items-center justify-between pt-4 border-t border-border"> + <span className="text-[11px] text-zinc-500"> + {files.length === 0 ? 'Add at least one file to continue.' : 'Ready for categorization.'} + </span> + <button + onClick={wizard.next} + disabled={files.length === 0} + className="btn-primary px-5 py-2 text-[13px] disabled:opacity-40 disabled:cursor-not-allowed" + > + Continue + </button> + </div> + </div> + </div> + ); +} From 95ca831e017c65802109af2335e9ccdfd0f6913f Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 21:49:00 -0400 Subject: [PATCH 24/43] =?UTF-8?q?feat(ui):=20phase=204=20=E2=80=94=20long-?= =?UTF-8?q?form=204-step=20wizard=20+=20Dashboard?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final phase of the UI/UX overhaul. Replaces the LongForm + Dashboard placeholders with a single-file 4-step wizard (Upload → Settings → Processing → Editor) and a real Dashboard that consumes the notifications store + local histories. Long-form wizard: - Reuses `useWizard` from Phase 3. - Step 1 Upload: single MP4/MOV up to 8 GB (4K), drag-drop or browse. - Step 2 Settings: 5 toggles — color grade, auto subtitles, chapter detection, description/tags, intro/outro. Each toggle is annotated with its backend TODO #. - Step 3 Processing: simulated 5-stage progress bar with `SnakeGame` on the side. The real pipeline branches (silence removal, LUT, chapter detection, intro/outro) are plan TODOs #4–#8; the timer here lets the rest of the wizard be exercised end-to-end. - Step 4 Editor: 16:9 video preview + chapter timeline scrubber + right-panel tabs (Chapters / Subtitles / Export). Chapters are seeded from Step 3 (placeholder until backend TODO #6 ships). Inline rename + seek-on-click. "Export segment as short" opens a modal that documents the pending /api/long-form/export-segment route (plan TODO #7). - History tab reads `openshorts.longForm.history` (written on processing complete). Dashboard: - Three StatCards: clips processed (sum of short-form clip counts + long-form edits), scheduled (notifications with status='scheduled'), published (notifications submitted/published). Deltas surface the next platform on deck and the latest publish. - Upcoming uploads panel: filtered notifications list with platform badges and timestamps. - Recent activity panel: last 8 notifications, any type. - All values derive locally — the live backend feed lands with plan TODO #10 (GET /api/clips/recent). Wiring: - App.jsx import switched to ./pages/LongForm/index.jsx; the old single-file LongForm.jsx is removed. - Build verified: 1616 modules, 1288 KB JS chunk. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- frontend/src/App.jsx | 2 +- frontend/src/pages/Dashboard.jsx | 152 ++++++++- frontend/src/pages/LongForm.jsx | 15 - frontend/src/pages/LongForm/History.jsx | 59 ++++ frontend/src/pages/LongForm/Wizard.jsx | 88 ++++++ frontend/src/pages/LongForm/index.jsx | 51 +++ frontend/src/pages/LongForm/steps/Editor.jsx | 293 ++++++++++++++++++ .../src/pages/LongForm/steps/Processing.jsx | 147 +++++++++ .../src/pages/LongForm/steps/Settings.jsx | 69 +++++ frontend/src/pages/LongForm/steps/Upload.jsx | 106 +++++++ 10 files changed, 957 insertions(+), 25 deletions(-) delete mode 100644 frontend/src/pages/LongForm.jsx create mode 100644 frontend/src/pages/LongForm/History.jsx create mode 100644 frontend/src/pages/LongForm/Wizard.jsx create mode 100644 frontend/src/pages/LongForm/index.jsx create mode 100644 frontend/src/pages/LongForm/steps/Editor.jsx create mode 100644 frontend/src/pages/LongForm/steps/Processing.jsx create mode 100644 frontend/src/pages/LongForm/steps/Settings.jsx create mode 100644 frontend/src/pages/LongForm/steps/Upload.jsx diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 70c65972..0c0afc9f 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -2,7 +2,7 @@ import { Navigate, Route, Routes } from 'react-router-dom'; import AppShell from './layouts/AppShell.jsx'; import Dashboard from './pages/Dashboard.jsx'; import ShortForm from './pages/ShortForm/index.jsx'; -import LongForm from './pages/LongForm.jsx'; +import LongForm from './pages/LongForm/index.jsx'; import ClipGenerator from './pages/ClipGenerator.jsx'; import SettingsLayout from './pages/Settings/index.jsx'; import BrandKitSection from './pages/Settings/sections/BrandKitSection.jsx'; diff --git a/frontend/src/pages/Dashboard.jsx b/frontend/src/pages/Dashboard.jsx index 19faa43c..3446ac31 100644 --- a/frontend/src/pages/Dashboard.jsx +++ b/frontend/src/pages/Dashboard.jsx @@ -1,14 +1,148 @@ -import PageStub from './PageStub.jsx'; +// Dashboard. At-a-glance counters + scheduled-uploads list + recent +// activity. All stats are derived locally from the notifications store + +// short/long-form histories. Once the backend ships GET /api/clips/recent +// (plan TODO #10) we can swap the counters to a live feed. + +import { useEffect, useMemo, useState } from 'react'; +import { Calendar, CheckCircle2, Clock, Film, ScrollText } from 'lucide-react'; +import { Link } from 'react-router-dom'; +import StatCard from '../components/ui/StatCard.jsx'; +import PlatformBadge from '../components/ui/PlatformBadge.jsx'; +import { useNotifications } from '../state/notificationsStore.js'; + +const SHORT_HISTORY_KEY = 'openshorts.shortForm.history'; +const LONG_HISTORY_KEY = 'openshorts.longForm.history'; + +function loadHistory(key) { + try { + const raw = localStorage.getItem(key); + if (!raw) return []; + const parsed = JSON.parse(raw); + return Array.isArray(parsed) ? parsed : []; + } catch { + return []; + } +} export default function Dashboard() { + const { items: notifications } = useNotifications(); + const [shortHistory, setShortHistory] = useState([]); + const [longHistory, setLongHistory] = useState([]); + + useEffect(() => { + setShortHistory(loadHistory(SHORT_HISTORY_KEY)); + setLongHistory(loadHistory(LONG_HISTORY_KEY)); + }, []); + + const clipsProcessed = useMemo( + () => shortHistory.reduce((sum, h) => sum + (h.clipCount || 0), 0) + + longHistory.length, + [shortHistory, longHistory], + ); + + const scheduled = useMemo( + () => notifications.filter((n) => n.status === 'scheduled'), + [notifications], + ); + + const published = useMemo( + () => notifications.filter((n) => n.status === 'submitted' || n.status === 'published'), + [notifications], + ); + + const recent = notifications.slice(0, 8); + return ( - <PageStub - title="Dashboard" - description="At-a-glance view of your content pipeline: clips processed, scheduled uploads, and published videos." - todo={[ - 'Phase 4: 3 stat cards (clips processed / scheduled / published)', - 'Phase 4: scheduled-uploads list with platform badges', - ]} - /> + <div className="h-full overflow-y-auto custom-scrollbar"> + <div className="p-6 max-w-6xl mx-auto space-y-6"> + <header> + <h1 className="text-[20px] font-semibold text-white">Dashboard</h1> + <p className="text-[13px] text-zinc-500 mt-1"> + At-a-glance view of your pipeline. Live backend feed lands with plan TODO #10. + </p> + </header> + + <section className="grid grid-cols-1 md:grid-cols-3 gap-4"> + <StatCard + label="Clips processed" + value={clipsProcessed} + tone="accent" + icon={Film} + delta={shortHistory.length > 0 ? `${shortHistory.length} short-form batch${shortHistory.length === 1 ? '' : 'es'}` : 'No batches yet'} + /> + <StatCard + label="Scheduled" + value={scheduled.length} + tone="default" + icon={Calendar} + delta={scheduled[0] ? `Next: ${scheduled[0].platform || 'unknown'}` : 'Nothing on deck'} + /> + <StatCard + label="Published" + value={published.length} + tone="success" + icon={CheckCircle2} + delta={published[0] ? `Latest: ${published[0].platform || 'unknown'}` : 'No publishes yet'} + /> + </section> + + <section className="grid grid-cols-1 lg:grid-cols-2 gap-4"> + <div className="rounded-xl border border-border bg-surface p-5"> + <div className="flex items-center justify-between mb-4"> + <h2 className="text-[14px] font-semibold text-white flex items-center gap-2"> + <Calendar size={14} className="text-primary" /> + Upcoming uploads + </h2> + <Link to="/short-form" className="text-[11px] text-primary hover:underline"> + Schedule new + </Link> + </div> + {scheduled.length === 0 ? ( + <p className="text-[12px] text-zinc-500">No scheduled uploads. Schedule a clip from the Review step of the short-form wizard.</p> + ) : ( + <ul className="space-y-2"> + {scheduled.slice(0, 6).map((n) => ( + <li key={n.id} className="flex items-center gap-3 rounded-lg border border-border bg-background/40 p-3"> + {n.platform && <PlatformBadge platform={n.platform} withLabel={false} size="sm" />} + <div className="flex-1 min-w-0"> + <div className="text-[12px] text-white truncate">{n.message || 'Scheduled clip'}</div> + <div className="text-[10px] text-zinc-500 mt-0.5 flex items-center gap-1"> + <Clock size={10} /> {new Date(n.ts).toLocaleString()} + </div> + </div> + </li> + ))} + </ul> + )} + </div> + + <div className="rounded-xl border border-border bg-surface p-5"> + <div className="flex items-center justify-between mb-4"> + <h2 className="text-[14px] font-semibold text-white flex items-center gap-2"> + <ScrollText size={14} className="text-primary" /> + Recent activity + </h2> + </div> + {recent.length === 0 ? ( + <p className="text-[12px] text-zinc-500">No activity yet — publish your first clip and it’ll appear here.</p> + ) : ( + <ul className="space-y-2"> + {recent.map((n) => ( + <li key={n.id} className="flex items-center gap-3 rounded-lg border border-border bg-background/40 p-3"> + {n.platform && <PlatformBadge platform={n.platform} withLabel={false} size="sm" />} + <div className="flex-1 min-w-0"> + <div className="text-[12px] text-white truncate">{n.message || `${n.type} event`}</div> + <div className="text-[10px] text-zinc-500 mt-0.5"> + {n.status} · {new Date(n.ts).toLocaleString()} + </div> + </div> + </li> + ))} + </ul> + )} + </div> + </section> + </div> + </div> ); } diff --git a/frontend/src/pages/LongForm.jsx b/frontend/src/pages/LongForm.jsx deleted file mode 100644 index 5cabc024..00000000 --- a/frontend/src/pages/LongForm.jsx +++ /dev/null @@ -1,15 +0,0 @@ -import PageStub from './PageStub.jsx'; - -export default function LongForm() { - return ( - <PageStub - title="Long-form" - description="Process a single long-form video end-to-end: color grade, subtitles, chapter detection, and segment-to-short exports." - todo={[ - 'Phase 4: 4-step wizard (Upload → Settings → Processing → Editor)', - 'Phase 4: chapter timeline scrubber + inline chapter rename', - 'Phase 4: subtitle panel + Export segment as short', - ]} - /> - ); -} diff --git a/frontend/src/pages/LongForm/History.jsx b/frontend/src/pages/LongForm/History.jsx new file mode 100644 index 00000000..f0406765 --- /dev/null +++ b/frontend/src/pages/LongForm/History.jsx @@ -0,0 +1,59 @@ +// Past long-form edits. Reads from localStorage 'openshorts.longForm.history'. +// TODO(backend): plan TODO #10 — replace with GET /api/clips/recent once the +// backend grows a unified job index. + +import { useEffect, useState } from 'react'; +import { Archive } from 'lucide-react'; + +const HISTORY_KEY = 'openshorts.longForm.history'; + +function loadHistory() { + try { + const raw = localStorage.getItem(HISTORY_KEY); + if (!raw) return []; + const parsed = JSON.parse(raw); + return Array.isArray(parsed) ? parsed : []; + } catch { + return []; + } +} + +export default function History() { + const [items, setItems] = useState([]); + + useEffect(() => { + setItems(loadHistory()); + }, []); + + if (items.length === 0) { + return ( + <div className="h-full flex flex-col items-center justify-center text-zinc-500 p-12"> + <Archive size={36} className="text-zinc-700 mb-3" /> + <div className="text-[14px] text-white font-medium">No long-form edits yet</div> + <p className="text-[12px] text-zinc-500 mt-1 text-center max-w-md"> + Each completed long-form edit will be saved here so you can re-open the editor or re-export segments without re-uploading the source. + </p> + </div> + ); + } + + return ( + <div className="p-6 space-y-3 overflow-y-auto custom-scrollbar h-full"> + {items.map((item) => ( + <div key={item.id} className="rounded-xl border border-border bg-surface p-4"> + <div className="flex items-center justify-between"> + <div className="min-w-0"> + <div className="text-[13px] font-medium text-white truncate">{item.title || 'Untitled edit'}</div> + <div className="text-[11px] text-zinc-500 mt-0.5"> + {new Date(item.ts).toLocaleString()} · {item.chapters || 0} chapter{item.chapters === 1 ? '' : 's'} + </div> + </div> + <span className="text-[10px] uppercase tracking-wider px-2 py-0.5 rounded border border-border bg-white/5 text-zinc-500 shrink-0 ml-3"> + Saved + </span> + </div> + </div> + ))} + </div> + ); +} diff --git a/frontend/src/pages/LongForm/Wizard.jsx b/frontend/src/pages/LongForm/Wizard.jsx new file mode 100644 index 00000000..aa503ad9 --- /dev/null +++ b/frontend/src/pages/LongForm/Wizard.jsx @@ -0,0 +1,88 @@ +// 4-step long-form wizard. Owns wizard state via useWizard; step +// components stay dumb. Mirrors the short-form layout but each step +// targets the long-form pipeline (chapter detection, segment exports). + +import { Check } from 'lucide-react'; +import { useWizard } from '../../hooks/useWizard.js'; +import Upload from './steps/Upload.jsx'; +import Settings from './steps/Settings.jsx'; +import Processing from './steps/Processing.jsx'; +import Editor from './steps/Editor.jsx'; + +const STEPS = [ + { id: 'upload', label: 'Upload' }, + { id: 'settings', label: 'Settings' }, + { id: 'processing', label: 'Processing', lock: true }, + { id: 'editor', label: 'Editor' }, +]; + +const INITIAL = { + file: null, // { id, file, name, size, durationSec? } + settings: { + colorGrade: true, + autoSubtitles: true, + chapterDetection: true, + descriptionTags: true, + introOutro: false, + }, + processing: { + progress: 0, // 0–100 (stubbed timer until backend ships) + status: 'idle', // idle | running | complete + }, + chapters: [], // [{ id, label, startSec, endSec }] +}; + +const STORAGE_KEY = 'openshorts.longForm.wizard'; + +export default function Wizard() { + const w = useWizard({ steps: STEPS, initialData: INITIAL, storageKey: STORAGE_KEY }); + + return ( + <div className="h-full flex flex-col"> + <StepIndicator wizard={w} /> + <div className="flex-1 overflow-hidden"> + {w.currentStep.id === 'upload' && <Upload wizard={w} />} + {w.currentStep.id === 'settings' && <Settings wizard={w} />} + {w.currentStep.id === 'processing' && <Processing wizard={w} />} + {w.currentStep.id === 'editor' && <Editor wizard={w} />} + </div> + </div> + ); +} + +function StepIndicator({ wizard }) { + return ( + <div className="px-6 py-4 border-b border-border bg-background shrink-0"> + <div className="flex items-center gap-3"> + {wizard.steps.map((s, i) => { + const active = i === wizard.step; + const done = i < wizard.step; + const reachable = i <= wizard.step && !wizard.isLocked; + return ( + <div key={s.id} className="flex items-center gap-3 flex-1"> + <button + onClick={() => reachable && wizard.goto(i)} + disabled={!reachable} + className={`flex items-center gap-2 disabled:cursor-not-allowed ${ + active ? 'text-white' : done ? 'text-zinc-300' : 'text-zinc-600' + }`} + > + <span className={`w-6 h-6 flex items-center justify-center rounded-full text-[11px] font-medium ${ + active ? 'bg-primary text-white' : + done ? 'bg-success/20 text-success border border-success/40' : + 'bg-white/5 text-zinc-500 border border-border' + }`}> + {done ? <Check size={12} /> : i + 1} + </span> + <span className="text-[12px]">{s.label}</span> + </button> + {i < wizard.steps.length - 1 && ( + <div className={`flex-1 h-px ${done ? 'bg-success/40' : 'bg-border'}`} /> + )} + </div> + ); + })} + </div> + </div> + ); +} diff --git a/frontend/src/pages/LongForm/index.jsx b/frontend/src/pages/LongForm/index.jsx new file mode 100644 index 00000000..0055b861 --- /dev/null +++ b/frontend/src/pages/LongForm/index.jsx @@ -0,0 +1,51 @@ +// Long-form workflow: single-file 4-step wizard plus a sibling History +// tab. App.jsx mounts this under /long-form/*; routing inside is +// router-local. + +import { NavLink, Outlet, Route, Routes } from 'react-router-dom'; +import Wizard from './Wizard.jsx'; +import History from './History.jsx'; + +function Shell() { + return ( + <div className="h-full flex flex-col"> + <div className="px-6 pt-5 pb-3 border-b border-border bg-background flex items-center gap-1 shrink-0"> + <NavLink + to="/long-form" + end + className={({ isActive }) => + `text-[13px] px-3 py-1.5 rounded-md transition-colors ${ + isActive ? 'bg-white/10 text-white' : 'text-zinc-400 hover:text-white' + }` + } + > + Wizard + </NavLink> + <NavLink + to="/long-form/history" + className={({ isActive }) => + `text-[13px] px-3 py-1.5 rounded-md transition-colors ${ + isActive ? 'bg-white/10 text-white' : 'text-zinc-400 hover:text-white' + }` + } + > + History + </NavLink> + </div> + <div className="flex-1 overflow-hidden"> + <Outlet /> + </div> + </div> + ); +} + +export default function LongForm() { + return ( + <Routes> + <Route element={<Shell />}> + <Route index element={<Wizard />} /> + <Route path="history" element={<History />} /> + </Route> + </Routes> + ); +} diff --git a/frontend/src/pages/LongForm/steps/Editor.jsx b/frontend/src/pages/LongForm/steps/Editor.jsx new file mode 100644 index 00000000..efc45fda --- /dev/null +++ b/frontend/src/pages/LongForm/steps/Editor.jsx @@ -0,0 +1,293 @@ +// Step 4: Long-form editor. 16:9 preview + chapter timeline scrubber + +// right panel with Chapters / Subtitles / Export tabs. +// +// All real backend wiring is deferred (plan TODOs #6 chapter detection, +// #7 segment export). The chapter list seeded by Step 3 is a placeholder +// and "Export segment as short" surfaces a "coming soon" modal. + +import { useEffect, useMemo, useRef, useState } from 'react'; +import { Download, FileText, Layers, Scissors, X } from 'lucide-react'; + +const TABS = [ + { id: 'chapters', label: 'Chapters', icon: Layers }, + { id: 'subtitles', label: 'Subtitles', icon: FileText }, + { id: 'export', label: 'Export', icon: Download }, +]; + +function fmtTime(sec) { + if (!Number.isFinite(sec)) return '--:--'; + const m = Math.floor(sec / 60); + const s = Math.floor(sec % 60); + return `${m}:${String(s).padStart(2, '0')}`; +} + +export default function Editor({ wizard }) { + const file = wizard.data.file; + const chapters = wizard.data.chapters || []; + const [tab, setTab] = useState('chapters'); + const [activeChapter, setActiveChapter] = useState(chapters[0]?.id || null); + const [showExportModal, setShowExportModal] = useState(false); + const [sourceUrl, setSourceUrl] = useState(null); + const [durationSec, setDurationSec] = useState(null); + const videoRef = useRef(null); + + useEffect(() => { + if (!(file?.file instanceof File)) { setSourceUrl(null); return; } + const url = URL.createObjectURL(file.file); + setSourceUrl(url); + return () => URL.revokeObjectURL(url); + }, [file?.file]); + + // Derive the timeline duration: real video metadata wins, else last + // chapter, else a 10-minute fallback so the bars render. + const totalDuration = durationSec || chapters[chapters.length - 1]?.endSec || 600; + + function seekTo(sec) { + if (videoRef.current) { + videoRef.current.currentTime = sec; + videoRef.current.play().catch(() => {}); + } + } + + function selectChapter(c) { + setActiveChapter(c.id); + seekTo(c.startSec); + } + + function renameChapter(id, label) { + wizard.setData({ + chapters: chapters.map((c) => c.id === id ? { ...c, label } : c), + }); + } + + if (!sourceUrl) { + return ( + <div className="h-full flex flex-col items-center justify-center p-12 text-center text-zinc-500"> + <p className="text-[14px] text-white font-medium">No source file available.</p> + <p className="text-[12px] mt-1">Re-upload to re-run the wizard.</p> + <button onClick={wizard.reset} className="mt-4 btn-primary px-4 py-2 text-[13px]"> + Start over + </button> + </div> + ); + } + + return ( + <div className="h-full flex"> + <div className="flex-1 flex flex-col bg-background overflow-hidden"> + <div className="flex-1 flex items-center justify-center p-6 overflow-hidden"> + <div className="w-full max-w-4xl"> + <div className="bg-black border border-border rounded-lg overflow-hidden aspect-video"> + <video + ref={videoRef} + src={sourceUrl} + controls + onLoadedMetadata={(e) => setDurationSec(e.currentTarget.duration)} + className="w-full h-full object-contain" + /> + </div> + </div> + </div> + + <ChapterTimeline + chapters={chapters} + totalDuration={totalDuration} + active={activeChapter} + onSelect={selectChapter} + /> + </div> + + <aside className="w-[320px] shrink-0 border-l border-border bg-surface flex flex-col"> + <div className="border-b border-border flex items-center"> + {TABS.map((t) => { + const Icon = t.icon; + const active = tab === t.id; + return ( + <button + key={t.id} + onClick={() => setTab(t.id)} + className={`flex-1 flex items-center justify-center gap-2 py-3 text-[12px] transition-colors ${ + active ? 'text-white bg-white/5 border-b-2 border-primary' : 'text-zinc-400 hover:text-white' + }`} + > + <Icon size={12} /> + {t.label} + </button> + ); + })} + </div> + + <div className="flex-1 overflow-y-auto custom-scrollbar p-4"> + {tab === 'chapters' && ( + <ChaptersPanel + chapters={chapters} + activeId={activeChapter} + onSelect={selectChapter} + onRename={renameChapter} + onExportSegment={() => setShowExportModal(true)} + /> + )} + {tab === 'subtitles' && <SubtitlesPanel />} + {tab === 'export' && <ExportPanel onSegmentClick={() => setShowExportModal(true)} />} + </div> + </aside> + + {showExportModal && ( + <SegmentExportModal onClose={() => setShowExportModal(false)} /> + )} + </div> + ); +} + +function ChapterTimeline({ chapters, totalDuration, active, onSelect }) { + return ( + <div className="border-t border-border bg-surface px-4 py-3 shrink-0"> + <div className="flex items-center justify-between mb-2"> + <span className="text-[11px] uppercase tracking-wider text-zinc-500">Chapters</span> + <span className="text-[11px] text-zinc-500 font-mono">{fmtTime(totalDuration)}</span> + </div> + <div className="relative h-7 bg-white/5 rounded-md overflow-hidden"> + {chapters.map((c) => { + const left = (c.startSec / totalDuration) * 100; + const width = ((c.endSec - c.startSec) / totalDuration) * 100; + const isActive = c.id === active; + return ( + <button + key={c.id} + onClick={() => onSelect(c)} + className={`absolute top-0 bottom-0 border-r border-background transition-colors ${ + isActive ? 'bg-primary/50' : 'bg-primary/15 hover:bg-primary/30' + }`} + style={{ left: `${left}%`, width: `${width}%` }} + title={`${c.label} (${fmtTime(c.startSec)} – ${fmtTime(c.endSec)})`} + > + <span className="absolute inset-0 flex items-center justify-center text-[10px] text-white truncate px-2"> + {c.label} + </span> + </button> + ); + })} + </div> + </div> + ); +} + +function ChaptersPanel({ chapters, activeId, onSelect, onRename, onExportSegment }) { + return ( + <div className="space-y-2"> + <p className="text-[11px] text-zinc-500 mb-3"> + Click a chapter to seek the preview. Rename inline. Export any chapter as a vertical short. + </p> + {chapters.map((c) => { + const active = c.id === activeId; + return ( + <div + key={c.id} + className={`rounded-lg border p-3 transition-colors ${ + active ? 'border-primary bg-primary/10' : 'border-border hover:bg-white/5' + }`} + > + <div className="flex items-center justify-between gap-2 mb-1"> + <input + value={c.label} + onChange={(e) => onRename(c.id, e.target.value)} + onFocus={() => onSelect(c)} + className="bg-transparent text-[13px] text-white font-medium flex-1 min-w-0 focus:outline-none" + /> + <button + onClick={onExportSegment} + className="shrink-0 text-[10px] px-2 py-0.5 rounded border border-primary/40 text-primary hover:bg-primary/10" + > + Export + </button> + </div> + <div className="text-[10px] font-mono text-zinc-500"> + {fmtTime(c.startSec)} – {fmtTime(c.endSec)} + </div> + </div> + ); + })} + {chapters.length === 0 && ( + <p className="text-[11px] text-zinc-500 italic">No chapters detected. Backend TODO #6.</p> + )} + </div> + ); +} + +function SubtitlesPanel() { + return ( + <div className="space-y-3 text-[12px]"> + <p className="text-[11px] text-zinc-500"> + Edit transcribed lines, retime, restyle, and re-export with the brand-kit subtitle style. + </p> + <div className="rounded-lg border border-border bg-background/40 p-4 text-center text-zinc-500 text-[11px]"> + Subtitle editor lands with backend transcript endpoint hookup. + <br /> + See plan TODO #6 + the existing /api/subtitle route. + </div> + </div> + ); +} + +function ExportPanel({ onSegmentClick }) { + return ( + <div className="space-y-3 text-[12px]"> + <p className="text-[11px] text-zinc-500"> + Export the full long-form edit or any single chapter as a vertical short. + </p> + <button + disabled + title="Long-form export — coming soon" + className="w-full px-3 py-2 text-[12px] rounded-md border border-border text-zinc-500 cursor-not-allowed flex items-center gap-2" + > + <Download size={12} /> Download long-form + </button> + <button + onClick={onSegmentClick} + className="w-full btn-primary px-3 py-2 text-[12px] flex items-center gap-2" + > + <Scissors size={12} /> Export segment as short + </button> + </div> + ); +} + +function SegmentExportModal({ onClose }) { + return ( + <div + className="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm" + onClick={onClose} + > + <div + className="bg-surface border border-border rounded-xl p-6 max-w-md w-full mx-4 shadow-2xl" + onClick={(e) => e.stopPropagation()} + > + <div className="flex items-center justify-between mb-3"> + <h2 className="text-[15px] font-semibold text-white flex items-center gap-2"> + <Scissors size={16} className="text-primary" /> + Export segment as short + </h2> + <button onClick={onClose} className="text-zinc-500 hover:text-white"> + <X size={14} /> + </button> + </div> + <p className="text-[13px] text-zinc-400 leading-relaxed"> + This will rerun the vertical-reframing pipeline on the selected chapter range and surface the result in the short-form Review step. + </p> + <div className="mt-4 rounded-lg border border-amber-500/30 bg-amber-500/10 p-3 text-[12px] text-amber-200"> + Backend route not implemented yet — see plan TODO #7 + (<code className="text-amber-300">POST /api/long-form/export-segment</code>). + The UI is wired so the button works the moment the route ships. + </div> + <div className="flex justify-end gap-2 mt-5"> + <button + onClick={onClose} + className="text-[13px] text-zinc-400 hover:text-white px-3 py-2 rounded-md" + > + Close + </button> + </div> + </div> + </div> + ); +} diff --git a/frontend/src/pages/LongForm/steps/Processing.jsx b/frontend/src/pages/LongForm/steps/Processing.jsx new file mode 100644 index 00000000..51826a07 --- /dev/null +++ b/frontend/src/pages/LongForm/steps/Processing.jsx @@ -0,0 +1,147 @@ +// Step 3: Long-form processing. The chapter-detection / segmentation +// pipeline isn't wired yet (plan TODOs #4–#8), so this step runs a +// simulated progress bar — the SnakeGame on the side handles the wait. +// +// TODO(backend): plan TODOs #4 (silence removal), #5 (LUT color grade), +// #6 (chapter detection), #7 (segment export), #8 (intro/outro). Replace +// the timer with a real polling loop once those routes ship. + +import { useEffect, useRef } from 'react'; +import { CheckCircle2, Loader2 } from 'lucide-react'; +import SnakeGame from '../../../components/ui/SnakeGame.jsx'; + +const STAGES = [ + { id: 'transcribe', label: 'Transcribing', until: 25 }, + { id: 'scenes', label: 'Detecting chapters', until: 55 }, + { id: 'grade', label: 'Applying color grade', until: 75 }, + { id: 'subs', label: 'Generating subtitles', until: 90 }, + { id: 'finalize', label: 'Finalizing', until: 100 }, +]; + +const HISTORY_KEY = 'openshorts.longForm.history'; + +function saveHistory(entry) { + try { + const raw = localStorage.getItem(HISTORY_KEY); + const list = raw ? JSON.parse(raw) : []; + list.unshift(entry); + localStorage.setItem(HISTORY_KEY, JSON.stringify(list.slice(0, 50))); + } catch {/* ignore */} +} + +export default function Processing({ wizard }) { + const proc = wizard.data.processing || { progress: 0, status: 'idle' }; + const file = wizard.data.file; + const startedRef = useRef(false); + const savedRef = useRef(false); + + // Drive the fake progress timer once. + useEffect(() => { + if (startedRef.current) return; + startedRef.current = true; + if (proc.status === 'complete') return; + wizard.setData((prev) => ({ ...prev, processing: { progress: 0, status: 'running' } })); + + const id = setInterval(() => { + wizard.setData((prev) => { + const cur = prev.processing?.progress ?? 0; + const next = Math.min(100, cur + 2 + Math.random() * 2); + if (next >= 100) { + clearInterval(id); + return { ...prev, processing: { progress: 100, status: 'complete' } }; + } + return { ...prev, processing: { progress: next, status: 'running' } }; + }); + }, 200); + return () => clearInterval(id); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + // When complete, persist a history entry once and seed chapters. + useEffect(() => { + if (proc.status !== 'complete' || savedRef.current) return; + savedRef.current = true; + saveHistory({ + id: `${Date.now()}-${Math.random().toString(36).slice(2, 6)}`, + ts: Date.now(), + title: file?.name ? `Edit: ${file.name}` : 'Untitled edit', + chapters: 3, + }); + // Placeholder chapters (real ones come with backend TODO #6). + if (!wizard.data.chapters?.length) { + wizard.setData({ + chapters: [ + { id: 'c1', label: 'Intro', startSec: 0, endSec: 60 }, + { id: 'c2', label: 'Main', startSec: 60, endSec: 540 }, + { id: 'c3', label: 'Outro', startSec: 540, endSec: 600 }, + ], + }); + } + }, [proc.status, file, wizard]); + + const currentStage = STAGES.find((s) => proc.progress <= s.until) || STAGES[STAGES.length - 1]; + + return ( + <div className="h-full overflow-y-auto custom-scrollbar"> + <div className="p-6 max-w-5xl mx-auto grid grid-cols-1 lg:grid-cols-2 gap-6"> + <section> + <h1 className="text-[18px] font-semibold text-white">Processing</h1> + <p className="text-[13px] text-zinc-500 mt-1 mb-4"> + Chapter detection and long-form pipeline branches are stubbed — + backend TODOs #4–#8. The progress here is simulated. + </p> + + <div className="rounded-xl border border-border bg-surface p-5 space-y-4"> + <div className="flex items-center justify-between"> + <span className="text-[13px] text-white flex items-center gap-2"> + {proc.status === 'complete' + ? <CheckCircle2 size={16} className="text-success" /> + : <Loader2 size={16} className="text-primary animate-spin" />} + {proc.status === 'complete' ? 'Complete' : currentStage.label} + </span> + <span className="text-[11px] font-mono text-zinc-500">{Math.round(proc.progress)}%</span> + </div> + <div className="h-1.5 bg-white/5 rounded-full overflow-hidden"> + <div + className="h-full bg-primary transition-all" + style={{ width: `${proc.progress}%` }} + /> + </div> + <ul className="text-[11px] text-zinc-500 space-y-1"> + {STAGES.map((s) => ( + <li key={s.id} className="flex items-center gap-2"> + <span className={`w-1.5 h-1.5 rounded-full ${ + proc.progress >= s.until ? 'bg-success' : + proc.progress + 5 >= s.until ? 'bg-primary' : 'bg-zinc-700' + }`} /> + {s.label} + </li> + ))} + </ul> + </div> + + <div className="flex items-center justify-between mt-6 pt-4 border-t border-border"> + <span className="text-[11px] text-zinc-500"> + {proc.status === 'complete' ? 'Ready to edit.' : 'Hang tight — try Snake while you wait.'} + </span> + <button + onClick={() => wizard.goto(3)} + disabled={proc.status !== 'complete'} + className="btn-primary px-5 py-2 text-[13px] disabled:opacity-40 disabled:cursor-not-allowed" + > + Open editor → + </button> + </div> + </section> + + <aside className="rounded-xl border border-border bg-surface p-5"> + <div className="mb-3"> + <h2 className="text-[14px] font-semibold text-white">Pass the time</h2> + <p className="text-[12px] text-zinc-500 mt-0.5">Render times scale with clip length — keep your hands busy.</p> + </div> + <SnakeGame /> + </aside> + </div> + </div> + ); +} diff --git a/frontend/src/pages/LongForm/steps/Settings.jsx b/frontend/src/pages/LongForm/steps/Settings.jsx new file mode 100644 index 00000000..fcd8e68f --- /dev/null +++ b/frontend/src/pages/LongForm/steps/Settings.jsx @@ -0,0 +1,69 @@ +// Step 2: Long-form processing settings. + +const TOGGLES = [ + { id: 'colorGrade', label: 'Color grade', hint: 'Apply a cinematic LUT (backend TODO #5).' }, + { id: 'autoSubtitles', label: 'Auto subtitles', hint: 'Transcribe + burn captions with brand-kit style.' }, + { id: 'chapterDetection', label: 'Chapter detection', hint: 'Run PySceneDetect for chapter markers (backend TODO #6).' }, + { id: 'descriptionTags', label: 'Description + tags', hint: 'Generate YouTube description and tag suggestions.' }, + { id: 'introOutro', label: 'Intro / outro', hint: 'Splice a brand-kit intro and outro (backend TODO #8).' }, +]; + +export default function Settings({ wizard }) { + const settings = wizard.data.settings || {}; + const file = wizard.data.file; + + function toggle(key) { + wizard.setData({ settings: { ...settings, [key]: !settings[key] } }); + } + + return ( + <div className="h-full overflow-y-auto custom-scrollbar"> + <div className="p-6 max-w-3xl mx-auto space-y-6"> + <header> + <h1 className="text-[18px] font-semibold text-white">Settings</h1> + <p className="text-[13px] text-zinc-500 mt-1"> + Pick what runs during processing. Each setting maps to a feature the editor exposes in Step 4. + </p> + </header> + + {file && ( + <div className="rounded-lg border border-border bg-surface p-3 flex items-center gap-3"> + <div className="flex-1 min-w-0"> + <div className="text-[12px] text-zinc-500">Source</div> + <div className="text-[13px] text-white truncate">{file.name}</div> + </div> + </div> + )} + + <section className="rounded-xl border border-border bg-surface p-5 space-y-3"> + <h2 className="text-[12px] uppercase tracking-wider text-zinc-500">Pipeline</h2> + <div className="grid grid-cols-1 md:grid-cols-2 gap-3"> + {TOGGLES.map((t) => ( + <label key={t.id} className="flex items-start gap-3 rounded-lg border border-border p-3 cursor-pointer hover:bg-white/5"> + <input + type="checkbox" + checked={!!settings[t.id]} + onChange={() => toggle(t.id)} + className="mt-1 accent-primary" + /> + <div> + <div className="text-[13px] text-white">{t.label}</div> + <div className="text-[11px] text-zinc-500 mt-0.5">{t.hint}</div> + </div> + </label> + ))} + </div> + </section> + + <div className="flex items-center justify-between pt-4 border-t border-border"> + <button onClick={wizard.back} className="text-[13px] text-zinc-400 hover:text-white transition-colors"> + ← Back + </button> + <button onClick={wizard.next} className="btn-primary px-5 py-2 text-[13px]"> + Start processing → + </button> + </div> + </div> + </div> + ); +} diff --git a/frontend/src/pages/LongForm/steps/Upload.jsx b/frontend/src/pages/LongForm/steps/Upload.jsx new file mode 100644 index 00000000..b87146ee --- /dev/null +++ b/frontend/src/pages/LongForm/steps/Upload.jsx @@ -0,0 +1,106 @@ +// Step 1: Upload one long-form source file. MP4/MOV up to 4K (cap to 8 GB). + +import { useRef, useState } from 'react'; +import { FileVideo, UploadCloud, X } from 'lucide-react'; + +const MAX_SIZE_BYTES = 8 * 1024 * 1024 * 1024; +const ALLOWED_TYPES = ['video/mp4', 'video/quicktime']; + +function fmtSize(bytes) { + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`; + if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; + return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`; +} + +function nextId() { return `${Date.now()}-${Math.random().toString(36).slice(2, 7)}`; } + +export default function Upload({ wizard }) { + const inputRef = useRef(null); + const [dragOver, setDragOver] = useState(false); + const [error, setError] = useState(''); + + const file = wizard.data.file; + + function accept(f) { + setError(''); + const okType = ALLOWED_TYPES.includes(f.type) || /\.(mp4|mov)$/i.test(f.name); + if (!okType) { setError(`${f.name}: only MP4 / MOV files.`); return; } + if (f.size > MAX_SIZE_BYTES) { setError(`${f.name}: over 8 GB.`); return; } + wizard.setData({ file: { id: nextId(), file: f, name: f.name, size: f.size } }); + } + + return ( + <div className="h-full overflow-y-auto custom-scrollbar"> + <div className="p-6 max-w-3xl mx-auto space-y-6"> + <header> + <h1 className="text-[18px] font-semibold text-white">Upload long-form video</h1> + <p className="text-[13px] text-zinc-500 mt-1"> + One source file. MP4 or MOV at up to 4K — the editor handles chapter detection, segment exports, and intro/outro insertion. + </p> + </header> + + <div + onDragOver={(e) => { e.preventDefault(); setDragOver(true); }} + onDragLeave={() => setDragOver(false)} + onDrop={(e) => { + e.preventDefault(); + setDragOver(false); + const f = e.dataTransfer.files?.[0]; + if (f) accept(f); + }} + onClick={() => inputRef.current?.click()} + className={`rounded-xl border-2 border-dashed p-10 text-center cursor-pointer transition-colors ${ + dragOver ? 'border-primary bg-primary/10' : 'border-border bg-surface hover:bg-white/5' + }`} + > + <UploadCloud size={36} className={`mx-auto mb-3 ${dragOver ? 'text-primary' : 'text-zinc-500'}`} /> + <div className="text-[14px] text-white font-medium"> + Drop a video here or click to browse + </div> + <div className="text-[11px] text-zinc-500 mt-1"> + MP4 / MOV · up to 8 GB · 4K supported + </div> + <input + ref={inputRef} + type="file" + accept="video/mp4,video/quicktime,.mp4,.mov" + className="hidden" + onChange={(e) => { const f = e.target.files?.[0]; if (f) accept(f); }} + /> + </div> + + {error && <div className="text-[12px] text-red-400">{error}</div>} + + {file && ( + <div className="flex items-center gap-3 rounded-lg border border-border bg-surface p-3"> + <FileVideo size={18} className="text-zinc-500 shrink-0" /> + <div className="flex-1 min-w-0"> + <div className="text-[13px] text-white truncate">{file.name}</div> + <div className="text-[11px] text-zinc-500">{fmtSize(file.size)}</div> + </div> + <button + onClick={() => wizard.setData({ file: null })} + className="p-1.5 text-zinc-500 hover:text-red-400 transition-colors" + aria-label={`Remove ${file.name}`} + > + <X size={14} /> + </button> + </div> + )} + + <div className="flex items-center justify-between pt-4 border-t border-border"> + <span className="text-[11px] text-zinc-500"> + {!file ? 'Add a video to continue.' : 'Ready for settings.'} + </span> + <button + onClick={wizard.next} + disabled={!file} + className="btn-primary px-5 py-2 text-[13px] disabled:opacity-40 disabled:cursor-not-allowed" + > + Continue + </button> + </div> + </div> + </div> + ); +} From 43c2d962f6d690d59f882a1d808cff7019215456 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 22:23:22 -0400 Subject: [PATCH 25/43] fix(smoke-test): runtime bugs + Codex H1/H2/M3 remediation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surfaced by the first browser smoke test after the 4-phase UI overhaul (npm run build was green but never exercised the running app), plus follow-ups for the Codex adversarial review's HIGH/MEDIUM findings. Bugs caught by smoke test: - backend/app/main.py: run_job invoked `python -u main.py` which no longer exists post-restructure. Container log: "python: can't open file '/app/main.py'". Switched to `python -u -m app.cli`. Without this, every short-form Processing job exits with code 2. - frontend/src/pages/LongForm/steps/Processing.jsx: under React StrictMode (dev) the `startedRef` gate paired with the cleanup `clearInterval` caused mount #1 to start the timer, cleanup to clear it, and mount #2 to bail early — simulated progress stuck at 0%. Removed the gate; idempotent setData prevents double-reset. - frontend/src/components/ProcessingAnimation.jsx:228-229: two unescaped `>` chars in JSX text → `{'>'}`. Removes the only build warnings. Codex adversarial review remediation: - H1 / backend/app/main.py (input validation for STATE-MUTATING /api/process): new `_ensure_video_upload(filename, first_chunk)` rejects on extension (.mp4/.mov) and on missing MP4/MOV `ftyp` signature at byte offset 4. Validation runs before any disk write, so junk uploads no longer reach the pipeline. Returns 415 with a precise reason. Verified: text-content-with-.mp4-extension → 415 ftyp; real-mp4-with-.txt-extension → 415 ext; real .mp4 → 200. - H2 / frontend/src/hooks/useWizard.js + both Wizard.jsx callers: new optional `resetOnRehydrate(mergedData)` predicate. When it returns true (e.g. wizard state references a File that no longer survives JSON), the rehydrate force-resets to step 0 with initialData and clears localStorage. Eliminates the stranded-state bug where users could navigate past Categorize/Settings into a step that always fails. - M3 / frontend/src/pages/ShortForm/steps/Processing.jsx: polling effect now uses an AbortController + cancelled flag, fetchStatus accepts a signal, and the setData updater skips writes when the job has already moved to 'complete' or 'error'. Drops stale 'processing' responses that race past newer terminal updates. Tests / verification: - frontend npm run build: clean, 0 warnings (down from 2). - backend pytest -m "not e2e": 61/62 pass (test_openapi_dump_matches _baseline drifts on pydantic-emitted contentMediaType vs the baseline's format:binary — pre-existing, unrelated to these diffs, no route added/removed). - manual smoke: all 5 sidebar pages, all 4 legacy routes, short-form end-to-end (POST /api/process now succeeds; transcription runs; fails at Gemini with dummy key as expected), long-form end-to-end (simulated progress reaches 100%, Editor opens, Export modal works). security_baseline: applies: true surfaces: - id: POST /api/process (this change) tier: STATE-MUTATING controls: C3_input: { status: covered, mechanism: "extension + ftyp magic-bytes check before disk write" } C1_auth: { status: covered, mechanism: "X-Gemini-Key header (BYOK)" } C2_rate_limit: { status: opted_out, justification: "self-hosted single-tenant deployment; per-IP cap on the host process queue (MAX_CONCURRENT_JOBS) is the effective ceiling. Tracking: full rate-limit pass under /gsd-secure-phase." } C4_timeout: { status: opted_out, justification: "subprocess is intentionally unbounded — clip generation legitimately takes 5-60min. Tracking: kill switch via abuse detection in /gsd-secure-phase." } C7_idempotency: { status: opted_out, justification: "client retries would re-submit a fresh job_id; dedup is at user discretion. Tracking: idempotency-key in /gsd-secure-phase." } C8_concurrency: { status: covered, mechanism: "asyncio.Semaphore(MAX_CONCURRENT_JOBS) gate in process_queue" } C9_audit: { status: covered, mechanism: "attestation log line with IP + UA + timestamp + source per job" } C10_abuse: { status: opted_out, justification: "BYOK — cost is on the user's Gemini account, not the host. Tracking: per-user spend cap if multi-tenant ever ships." } Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- backend/app/main.py | 48 +++++++++++-- .../src/components/ProcessingAnimation.jsx | 4 +- frontend/src/hooks/useWizard.js | 17 +++-- frontend/src/pages/LongForm/Wizard.jsx | 14 +++- .../src/pages/LongForm/steps/Processing.jsx | 14 ++-- frontend/src/pages/ShortForm/Wizard.jsx | 17 ++++- .../src/pages/ShortForm/steps/Processing.jsx | 70 ++++++++++++------- 7 files changed, 141 insertions(+), 43 deletions(-) diff --git a/backend/app/main.py b/backend/app/main.py index 0c904a33..729b538e 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -310,6 +310,32 @@ async def run_job(job_id, job_data): jobs[job_id]['status'] = 'failed' jobs[job_id]['logs'].append(f"Execution error: {str(e)}") +_ALLOWED_VIDEO_EXTS = {".mp4", ".mov"} +# MP4 / MOV files start with a 'ftyp' box at byte offset 4. +# Reference: ISO/IEC 14496-12 (MP4 container spec). +_MP4_FTYP_MAGIC = b"ftyp" + + +def _ensure_video_upload(filename: str, first_chunk: bytes) -> None: + """Reject uploads that aren't MP4/MOV by extension AND magic-byte check. + + Browsers send inconsistent Content-Type for video files (often + application/octet-stream), so MIME is intentionally not checked — + the extension + ftyp signature pair is the authoritative test. + """ + ext = os.path.splitext((filename or "").lower())[1] + if ext not in _ALLOWED_VIDEO_EXTS: + raise HTTPException( + status_code=415, + detail=f"Unsupported file type {ext!r}. Allowed: {sorted(_ALLOWED_VIDEO_EXTS)}", + ) + if len(first_chunk) < 12 or first_chunk[4:8] != _MP4_FTYP_MAGIC: + raise HTTPException( + status_code=415, + detail="File contents do not match MP4/MOV format (ftyp signature missing).", + ) + + @app.get("/api/config") async def get_config(): return {"youtubeUrlEnabled": not DISABLE_YOUTUBE_URL} @@ -362,24 +388,36 @@ async def process_endpoint( os.makedirs(job_output_dir, exist_ok=True) # Prepare Command - cmd = ["python", "-u", "main.py"] # -u for unbuffered + cmd = ["python", "-u", "-m", "app.cli"] # -u for unbuffered; CLI lives at backend/app/cli.py post-restructure env = os.environ.copy() env["GEMINI_API_KEY"] = api_key # Override with key from request if url: cmd.extend(["-u", url]) else: - # Save uploaded file with size limit check + # Save uploaded file with size + signature checks. input_path = os.path.join(UPLOAD_DIR, f"{job_id}_{file.filename}") - - # Read file in chunks to check size - size = 0 limit_bytes = MAX_FILE_SIZE_MB * 1024 * 1024 + # Read the first chunk and validate the signature before persisting + # anything to disk. Empty / wrong-type uploads are rejected early. + first_chunk = await file.read(1024 * 1024) + if not first_chunk: + shutil.rmtree(job_output_dir, ignore_errors=True) + raise HTTPException(status_code=400, detail="Uploaded file is empty.") + try: + _ensure_video_upload(file.filename or "", first_chunk) + except HTTPException: + shutil.rmtree(job_output_dir, ignore_errors=True) + raise + + size = len(first_chunk) with open(input_path, "wb") as buffer: + buffer.write(first_chunk) while content := await file.read(1024 * 1024): # Read 1MB chunks size += len(content) if size > limit_bytes: + buffer.close() os.remove(input_path) shutil.rmtree(job_output_dir) raise HTTPException(status_code=413, detail=f"File too large. Max size {MAX_FILE_SIZE_MB}MB") diff --git a/frontend/src/components/ProcessingAnimation.jsx b/frontend/src/components/ProcessingAnimation.jsx index c8ddec14..7292b537 100644 --- a/frontend/src/components/ProcessingAnimation.jsx +++ b/frontend/src/components/ProcessingAnimation.jsx @@ -225,8 +225,8 @@ const ProcessingAnimation = ({ media, isComplete, syncedTime, isSyncedPlaying, s {!isSyncedPlaying && !isComplete && ( <div className="absolute bottom-0 left-0 right-0 p-4 bg-gradient-to-t from-black/90 to-transparent z-30 flex justify-between items-end border-t border-white/5"> <div className="font-mono text-[10px] text-primary/80 space-y-1"> - <div className="flex items-center gap-2"><Activity size={10} className="animate-bounce" /> > ANALYSIS_THREAD_01: ACTIVE</div> - <div className="flex items-center gap-2"><Radio size={10} /> > AUDIO_TRANSCRIPT: PROCESSING</div> + <div className="flex items-center gap-2"><Activity size={10} className="animate-bounce" /> {'>'} ANALYSIS_THREAD_01: ACTIVE</div> + <div className="flex items-center gap-2"><Radio size={10} /> {'>'} AUDIO_TRANSCRIPT: PROCESSING</div> </div> <div className="flex gap-1"> <div className="w-1 h-3 bg-primary/40 animate-[pulse_0.5s_infinite]"></div> diff --git a/frontend/src/hooks/useWizard.js b/frontend/src/hooks/useWizard.js index b23932b6..c1426a5f 100644 --- a/frontend/src/hooks/useWizard.js +++ b/frontend/src/hooks/useWizard.js @@ -39,7 +39,7 @@ function reducer(state, action) { } } -export function useWizard({ steps, initialData = {}, storageKey = null }) { +export function useWizard({ steps, initialData = {}, storageKey = null, resetOnRehydrate = null }) { const maxStep = steps.length - 1; const initial = useRef({ @@ -51,8 +51,9 @@ export function useWizard({ steps, initialData = {}, storageKey = null }) { const [state, dispatch] = useReducer(reducer, initial.current); // Rehydrate once from localStorage. File objects don't survive JSON - // round-trips, so callers should treat persisted File fields as "may be - // missing on reload" and recover gracefully. + // round-trips. If `resetOnRehydrate(mergedData)` returns true, force + // step=0 + initialData and clear persistence — keeps users from + // marching past lost state into a step that will fail. useEffect(() => { if (!storageKey) return; try { @@ -60,14 +61,20 @@ export function useWizard({ steps, initialData = {}, storageKey = null }) { if (!raw) return; const saved = JSON.parse(raw); if (saved && typeof saved.step === 'number') { + const merged = { ...initialData, ...(saved.data || {}) }; + const corrupt = + saved.step > 0 + && typeof resetOnRehydrate === 'function' + && resetOnRehydrate(merged); dispatch({ type: 'REHYDRATE', state: { - step: Math.min(saved.step, maxStep), - data: { ...initialData, ...(saved.data || {}) }, + step: corrupt ? 0 : Math.min(saved.step, maxStep), + data: corrupt ? initialData : merged, maxStep, }, }); + if (corrupt) localStorage.removeItem(storageKey); } } catch {/* ignore */} // eslint-disable-next-line react-hooks/exhaustive-deps diff --git a/frontend/src/pages/LongForm/Wizard.jsx b/frontend/src/pages/LongForm/Wizard.jsx index aa503ad9..8a770831 100644 --- a/frontend/src/pages/LongForm/Wizard.jsx +++ b/frontend/src/pages/LongForm/Wizard.jsx @@ -34,8 +34,20 @@ const INITIAL = { const STORAGE_KEY = 'openshorts.longForm.wizard'; +// File objects can't be JSON-serialized; after a reload `data.file.file` +// is a plain object instead of a real File and Settings/Processing/Editor +// would all fail. Force the wizard back to Upload in that case. +function longFormNeedsFreshUpload(data) { + return !!data?.file && !(data.file.file instanceof File); +} + export default function Wizard() { - const w = useWizard({ steps: STEPS, initialData: INITIAL, storageKey: STORAGE_KEY }); + const w = useWizard({ + steps: STEPS, + initialData: INITIAL, + storageKey: STORAGE_KEY, + resetOnRehydrate: longFormNeedsFreshUpload, + }); return ( <div className="h-full flex flex-col"> diff --git a/frontend/src/pages/LongForm/steps/Processing.jsx b/frontend/src/pages/LongForm/steps/Processing.jsx index 51826a07..ef4c4676 100644 --- a/frontend/src/pages/LongForm/steps/Processing.jsx +++ b/frontend/src/pages/LongForm/steps/Processing.jsx @@ -32,15 +32,19 @@ function saveHistory(entry) { export default function Processing({ wizard }) { const proc = wizard.data.processing || { progress: 0, status: 'idle' }; const file = wizard.data.file; - const startedRef = useRef(false); const savedRef = useRef(false); - // Drive the fake progress timer once. + // Drive the fake progress timer. The cleanup intentionally clears its own + // interval — under React 18 StrictMode dev the effect runs twice, but each + // run owns its own timer and the setData updater reads the latest progress + // so the visible counter never flickers backward. useEffect(() => { - if (startedRef.current) return; - startedRef.current = true; if (proc.status === 'complete') return; - wizard.setData((prev) => ({ ...prev, processing: { progress: 0, status: 'running' } })); + wizard.setData((prev) => { + const existing = prev.processing; + if (existing?.status === 'running' || existing?.status === 'complete') return prev; + return { ...prev, processing: { progress: 0, status: 'running' } }; + }); const id = setInterval(() => { wizard.setData((prev) => { diff --git a/frontend/src/pages/ShortForm/Wizard.jsx b/frontend/src/pages/ShortForm/Wizard.jsx index 7b2569d2..3f2d2bd3 100644 --- a/frontend/src/pages/ShortForm/Wizard.jsx +++ b/frontend/src/pages/ShortForm/Wizard.jsx @@ -29,8 +29,23 @@ const INITIAL = { const STORAGE_KEY = 'openshorts.shortForm.wizard'; +// File objects can't be JSON-serialized; after a reload the `files` +// array would carry plain {name,size,...} stubs instead of real Files +// and any forward step would fail. Detect that and force the wizard +// back to Upload before the user sees stale state. +function shortFormNeedsFreshUpload(data) { + return Array.isArray(data?.files) + && data.files.length > 0 + && data.files.some((f) => !(f?.file instanceof File)); +} + export default function Wizard() { - const w = useWizard({ steps: STEPS, initialData: INITIAL, storageKey: STORAGE_KEY }); + const w = useWizard({ + steps: STEPS, + initialData: INITIAL, + storageKey: STORAGE_KEY, + resetOnRehydrate: shortFormNeedsFreshUpload, + }); return ( <div className="h-full flex flex-col"> diff --git a/frontend/src/pages/ShortForm/steps/Processing.jsx b/frontend/src/pages/ShortForm/steps/Processing.jsx index 44cd7213..d8a53f5d 100644 --- a/frontend/src/pages/ShortForm/steps/Processing.jsx +++ b/frontend/src/pages/ShortForm/steps/Processing.jsx @@ -17,7 +17,7 @@ import SnakeGame from '../../../components/ui/SnakeGame.jsx'; const POLL_MS = 2000; const HISTORY_KEY = 'openshorts.shortForm.history'; -async function startJob({ file, geminiKey }) { +async function startJob({ file, geminiKey, signal }) { const formData = new FormData(); formData.append('file', file); formData.append('acknowledged', 'true'); @@ -25,13 +25,14 @@ async function startJob({ file, geminiKey }) { method: 'POST', headers: { 'X-Gemini-Key': geminiKey }, body: formData, + signal, }); if (!res.ok) throw new Error(await res.text()); return res.json(); } -async function fetchStatus(jobId) { - const res = await fetch(getApiUrl(`/api/status/${jobId}`)); +async function fetchStatus(jobId, signal) { + const res = await fetch(getApiUrl(`/api/status/${jobId}`), { signal }); if (!res.ok) throw new Error(await res.text()); return res.json(); } @@ -88,38 +89,59 @@ export default function Processing({ wizard }) { // eslint-disable-next-line react-hooks/exhaustive-deps }, []); - // Poll status for every still-running job. + // Poll status for every still-running job. Cleanup aborts in-flight + // fetches and a `cancelled` flag stops late responses from committing + // after unmount or after the effect re-runs. The terminal-status guard + // in the setData updater drops stale 'processing' responses that race + // past a newer 'complete'/'error' response. useEffect(() => { const active = Object.entries(jobs).filter(([, j]) => j.jobId && j.status === 'processing'); if (active.length === 0) return; + let cancelled = false; + const controller = new AbortController(); const id = setInterval(async () => { + if (cancelled) return; for (const [fileId, j] of active) { try { - const data = await fetchStatus(j.jobId); - wizard.setData((prev) => ({ - ...prev, - jobs: { - ...prev.jobs, - [fileId]: { - ...prev.jobs[fileId], - status: data.status || prev.jobs[fileId].status, - logs: data.logs || prev.jobs[fileId].logs, - result: data.results || prev.jobs[fileId].result, + const data = await fetchStatus(j.jobId, controller.signal); + if (cancelled) return; + wizard.setData((prev) => { + const cur = prev.jobs[fileId]; + if (cur?.status === 'complete' || cur?.status === 'error') return prev; + return { + ...prev, + jobs: { + ...prev.jobs, + [fileId]: { + ...cur, + status: data.status || cur.status, + logs: data.logs || cur.logs, + result: data.results || cur.result, + }, }, - }, - })); + }; + }); } catch (e) { - wizard.setData((prev) => ({ - ...prev, - jobs: { - ...prev.jobs, - [fileId]: { ...prev.jobs[fileId], status: 'error', logs: [...(prev.jobs[fileId].logs || []), String(e.message || e)] }, - }, - })); + if (e?.name === 'AbortError' || cancelled) return; + wizard.setData((prev) => { + const cur = prev.jobs[fileId]; + if (cur?.status === 'complete' || cur?.status === 'error') return prev; + return { + ...prev, + jobs: { + ...prev.jobs, + [fileId]: { ...cur, status: 'error', logs: [...(cur?.logs || []), String(e.message || e)] }, + }, + }; + }); } } }, POLL_MS); - return () => clearInterval(id); + return () => { + cancelled = true; + controller.abort(); + clearInterval(id); + }; // Re-subscribe whenever the set of active job statuses changes. // eslint-disable-next-line react-hooks/exhaustive-deps }, [Object.values(jobs).map((j) => j.status).join(',')]); From 93f59074d6dc0b8139a07d04565618b1ee35a96f Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Tue, 19 May 2026 22:31:59 -0400 Subject: [PATCH 26/43] docs(roadmap): add product roadmap + smoke-test follow-ups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restructures ROADMAP.md into two top-level sections: 1. Product roadmap — user-facing feature backlog, tiered as Shipped / Stubbed-in-v1 / Later. Each Stubbed item names the backend TODO that unblocks full functionality so the wiring map is unambiguous. Covers Short-form, Long-form, Clip Generator, Dashboard, Settings, and the Notifications system. 2. Technical roadmap — unchanged content for Features A/B/C designs and the deferred-refactor table, kept under a clearly marked heading. The frontend-restructure row in the deferred table is updated to "superseded by the 4-phase UI overhaul". Adds a Follow-ups section capturing what the smoke-test pass surfaced but didn't ship: - Backend security-baseline gaps for POST /api/process (C2 rate limit, C4 timeout/breaker, C7 idempotency, C10 abuse cap) - Three frontend polish items (Dashboard caption mismatch, Skip/Review both disabled on all-error, useRef tidy-up) - Two infra gotchas (Docker /app/node_modules anonymous volume, OpenAPI snapshot Pydantic version drift) - Codex re-run note (task-mpdeyzjz-vpdetv reference for the baseline audit that produced the H1/H2/M3 list) Updates the "What landed" log with the most recent commits (brand kit, 4-phase UI overhaul, smoke-test fix). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- ROADMAP.md | 291 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 261 insertions(+), 30 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index d07b080e..63e10493 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,17 +1,239 @@ # ROADMAP -Designs and ordering for the three future features the user asked about -during the restructure planning, plus the refactors deliberately deferred -out of the restructure phase so it could ship safely. +Two sections: + +1. **Product roadmap** — user-facing feature backlog. Honest about what's shipped vs stubbed. +2. **Technical roadmap** — designs for the three larger backend features (motion graphics, soundtracks, layouts) and the deferred restructure work. + +--- + +## Product roadmap + +Tiering: + +- **Shipped** — verified end-to-end in a browser smoke test on `chore/restructure-and-docs`. +- **Stubbed in v1** — UI is in place; the backend feature is a no-op, placeholder, or partial loop. Each item lists the backend TODO that unblocks it. +- **Later** — not started. + +### Short-form wizard + +**Shipped** +- 4-step wizard (Upload → Categorize → Processing → Review) +- Batch upload up to 5 clips, MP4 / MOV ≤ 2 GB +- Server-side MP4/MOV signature validation (extension + `ftyp` magic bytes at byte offset 4) +- Wizard auto-resets to Upload on rehydrate when File handle is lost (no more stranded Categorize/Processing state) +- Real-time per-clip progress polling with abort-on-unmount + terminal-status guard against stale responses +- Mini Snake game during processing +- Split-view review: clip list + phone preview + Before/After toggle +- Download / Publish / Schedule buttons (UI; backend gaps below) +- Processing history tab +- Info tooltips on every major control + +**Stubbed in v1** +- "AI auto-categorization per clip" — defaults are pre-selected; real classification is `POST /api/categorize` (backend TODO #2) +- "Auto color grading" — toggle exists; LUT application is backend TODO #5 +- "Silence and dead-air removal" — toggle exists; `silencedetect` integration is backend TODO #4 +- "Face-focus layout" — toggle exists; per-category pipeline branches are backend TODO #3 +- "Batch endpoint" — wizard fires one `POST /api/process` per file in parallel; real `POST /api/process/batch` is backend TODO #1 +- "Send to CapCut export format" — UI-only placeholder +- "Publish directly to connected platform" — `/api/social/post` is synchronous today; bell notification can't advance past `submitted` until backend TODO #9 lands (`publish_jobs` queue + `GET /api/social/publish/status/{publish_id}`, mirroring the thumbnail flow at `backend/app/main.py:1565-1620`) +- "Schedule upload for a specific date/time" — same as Publish; UI fires the notification but no scheduler runs the eventual publish + +**Later** +- Auto layout detection (screen-share → 16:9 + face-cam below; face-only → full 9:16) +- Face-cam position configuration (corners) +- True per-clip settings cards (today the auto-edit toggles apply to the whole batch) +- Re-upload an externally edited CapCut export +- AI learns talking-head style from past content +- Confidence score per AI edit decision +- Auto-detect / highlight viral hook moments within a clip +- B-roll auto-insertion suggestions +- Auto zoom / punch-in on face at key moments +- Background music auto-suggestion + fade +- Auto intro / outro from brand-kit templates +- Clip re-ordering within a batch before export +- Duplicate clip with different settings (A/B export) +- Platform-specific export presets (TikTok 9:16, Reels, Shorts, Snap) +- Per-platform subtitle style overrides +- Auto-generate social-media caption text (separate from burn-in subtitles) +- Thumbnail auto-generation per clip +- Viral score prediction per clip +- Compare two clips side by side in review +- Bulk download / bulk schedule for a batch +- Favorites / pins / tags / folders +- Central media library across all content types +- Filter library by platform / type / date / campaign + +### Long-form wizard + +**Shipped** +- 4-step wizard (Upload → Settings → Processing → Editor) +- Single MP4 / MOV upload ≤ 8 GB (4K supported) +- Simulated 5-stage processing progress (StrictMode-safe: timer survives the dev-mode double-mount) +- Editor: video preview + chapter timeline scrubber + Chapters / Subtitles / Export tabs +- Inline chapter title rename +- "Export segment as short" modal (UI wired; backend route still TODO) +- Mini Snake game during processing +- Wizard auto-resets to Upload on rehydrate when File handle is lost +- Processing history tab +- Info tooltips + +**Stubbed in v1** +- "AI auto-chapter detection" — Editor seeds 3 placeholder chapters (Intro / Main / Outro); real PySceneDetect-driven chapters are backend TODO #6 +- "Auto-generate YouTube description, tags, chapter timestamps" — toggle exists; no backend code wired (TODO #6+) +- "Subtitle panel: click any line to edit" — read-only panel today; edit → re-render pipeline isn't wired +- "Color grade" / "Intro / outro" toggles — backend TODOs #5 / #8 +- "Export segment as short" backend route — `POST /api/long-form/export-segment` is backend TODO #7 + +**Later** +- Jump cut + filler-word removal (um, uh, like) +- Auto intro / outro from brand kit +- Separate LUT for long-form +- Color-grade matching with short-form +- AI show notes / blog post from transcript +- Multi-track audio editing (voice vs background) +- B-roll markers + suggestions on the timeline +- AI highlight-reel generator (best 2–3 min auto-extracted) +- Transcript-driven cuts (edit words → edits video) +- Drag-and-drop chapter reordering +- Auto thumbnail with face detection (best-frame picker) +- Engagement heat-map overlay (predicted drop-off) +- Direct YouTube Studio integration (push title, description, thumbnail, chapters in one click) +- Long-form → multi-shorts auto-pipeline (5 shorts from one source) +- Speaker labelling (multi-person) +- Subtitle export as `.srt` (separate file alongside burn-in) + +### Clip Generator (original `/api/process` flow) + +**Shipped** +- Upload a long-form video file OR paste a YouTube URL +- AI extracts top viral moments (Gemini 2.5 Flash, 3–15 clips per video) +- Preview + download extracted clips +- Subtitle, Hook, Translate, Render modals on each result card +- Subprocess command fixed (`python -u -m app.cli`; the pre-restructure `python -u main.py` was broken on this branch and surfaced by the smoke test) + +**Later** +- Batch-extract from multiple YouTube URLs at once +- Filter extracted clips by length (15s / 30s / 60s) +- Inline trim / subtitle edit before download +- Auto-rank clips by predicted virality +- Push extracted clips directly into the short-form wizard + +### Dashboard + +**Shipped** +- 3 StatCards (clips processed / scheduled / published) — counters derive from history + notifications stores +- "Upcoming uploads" panel (filters the notification feed) +- "Recent activity" panel + +**Stubbed in v1** +- Live backend feed for the StatCards — today everything derives from localStorage; the real source is `GET /api/clips/recent?limit=20` (backend TODO #10) + +**Later** +- Per-platform analytics (views, watch time, follower growth, engagement rate) +- "Best-performing clip of the week" panel +- Posting-consistency / streak tracker +- Recommended posting times per platform +- Quick-upload shortcut from the dashboard +- Revenue / monetization tracker (YouTube, TikTok creator fund) + +### Settings + +**Shipped** +- Brand Kit editor — colors, font, per-aspect layout, 3×3 text-position grid, live preview cycling +- Brand-kit font upload (`assets/fonts/user/`, volume-mounted, persisted across restarts) +- API Keys page (Gemini / Upload-Post / ElevenLabs / fal.ai) +- VS-Code 180 px section nav (General / Platforms / System) + +**Stubbed in v1** (placeholder section pages render but expose no editable controls yet) +- Subtitle style (separate from Brand Kit) +- Color presets +- Export defaults +- Per-platform settings (YouTube / TikTok / Instagram / Snapchat / Facebook) +- Processing history + +**Later** +- Multiple brand-kit profiles (per channel / brand) +- Team / multi-user with role permissions +- Template system (save full settings as a named template, apply in one click) +- Light / dark mode toggle +- White-label custom domain +- Webhook + Zapier / Make integrations +- Storage / usage stats (storage used, clips processed this month, API calls) +- Email + push notifications for processing-complete / upload-confirmed events + +### Notifications system + +**Shipped** +- Bell icon in the header with unread dot +- `pushNotification(...)` from publish / schedule actions in `ResultCard.jsx` + `ScheduleWeekModal.jsx` +- Dropdown with mark-all-read + clear +- Codex audit cleared the path of DOM/XSS concerns: untrusted text renders as React text nodes, not HTML + +**Stubbed in v1** +- The bell terminates at `submitted` / `scheduled` because publish is synchronous. The async upgrade is backend TODO #9 (see Short-form Publish above). Once it lands, the bell can advance items through `submitted → published` and `submitted → error`. + +**Later** +- Processing-complete notification +- Scheduling reminder (upcoming scheduled post) +- Platform error alerts (failed upload) +- Browser / mobile push +- Email + +--- + +## Follow-ups from the smoke-test pass + +Captured during the post-Phase-4 manual browser smoke test on +`chore/restructure-and-docs` (commit `43c2d96`). Each item maps to an +exact file:line so the next agent doesn't have to re-find it. + +### Backend security baseline for `POST /api/process` (STATE-MUTATING) + +The smoke-test commit landed C3 (input validation) and re-confirmed C8 +(concurrency lock via the job-queue semaphore) and C9 (attestation log). +Three controls remain opted-out per the `security_baseline:` block — +a `/gsd-secure-phase` pass should land them: + +- **C2 — Rate limit.** Per-IP and per-key caps. `MAX_CONCURRENT_JOBS` is process-wide, not per-caller. +- **C4 — Timeout / breaker.** `run_job` spawns a Python subprocess with no timeout. A 15-min hard cap + breaker on repeated subprocess crashes (e.g. yt-dlp 403s) would prevent zombie jobs. +- **C7 — Idempotency.** Accept `Idempotency-Key` header; dedup window keyed on `(api_key_fingerprint, file_sha256 OR url)` for ~5 min. +- **C10 — Abuse / cost cap.** BYOK means cost lands on the user, so the host-side concern is volume — burst-rate kill switch + per-IP/day quota. + +### Frontend polish + +- **`Dashboard.jsx`** — the "CLIPS PROCESSED" StatCard shows the count but the sub-caption still reads "No batches yet" even when the count is non-zero. The caption is a separate field; threshold-derived sub-copy (`count > 0 ? '{n} batches' : 'No batches yet'`) is the fix. +- **`ShortForm/steps/Processing.jsx:189-203`** — when every job ends in `error`, both **Skip** and **Review** are disabled (Skip gates on `hasAnyComplete`; Review on `overallStatus === 'complete'`). The user has no forward path. Either let Review unlock when *all* jobs reach a terminal status (including all-error), or add a "Start over" link. +- **`LongForm/steps/Processing.jsx`** — the import of `useRef` is still in place (used by `savedRef`); harmless, but worth a tidy if/when the `savedRef` pattern itself gets replaced by a status check. + +### Infra / tooling + +- **Docker Compose anonymous volume gotcha.** `/app/node_modules` in `docker-compose.yml` masks freshly-installed npm deps after a `package.json` change — `react-router-dom` was missing in the container on first smoke-test run despite being in `package.json`. Fix is either (a) document `docker compose down -v && docker compose up --build` in the README, or (b) drop the anonymous volume from the `frontend` service and accept slower first builds. +- **OpenAPI snapshot drift.** `backend/tests/api/test_openapi_contract.py` fails 1/62 in the current docker image — Pydantic emits `contentMediaType: application/octet-stream` for file-upload fields where the baseline has `format: binary`. No route changes; pure schema-serialization drift. Regenerate per the procedure in `HANDOFF.md §12`, or pin Pydantic to the baseline-generation version. +- **`assets/fonts/user/` mount.** Already persistent across restarts, but there's no UI to *delete* an uploaded font yet. + +### Adversarial review re-run + +The H1 / H2 / M3 remediations in `43c2d96` are based on the read-only Codex +audit (task `task-mpdeyzjz-vpdetv`, completed 2026-05-20 02:01 UTC). +A follow-up `/codex:adversarial-review` before merge is worth running — +both to verify the fixes land cleanly and to surface anything the first +pass deferred. + +--- + +## Technical roadmap + +Designs and ordering for the three larger backend features the user asked +about during the restructure planning, plus the refactors deliberately +deferred out of the restructure phase so it could ship safely. The headline rule: **everything below depends on the package structure that already shipped in Phase 1, plus the single-FFmpeg-wrapper convention.** Each feature is sized so that it can land in a small handful of atomic commits with the `pytest -m "not e2e"` suite green between commits. ---- - -## Ordering (lowest blast radius first) +### Ordering (lowest blast radius first) 1. **Feature C — Motion Graphics Library.** Reuses the proven FFmpeg-overlay pattern from `openshorts/overlays/hooks.py`. No changes @@ -38,9 +260,9 @@ The three deferred refactors interleave naturally: --- -## Feature C — Motion Graphics Library +### Feature C — Motion Graphics Library -### Why first +#### Why first The hook-overlay code in `openshorts/overlays/hooks.py:add_hook_to_video()` already proves out the pattern: render PNG via PIL, burn onto video via @@ -48,7 +270,7 @@ FFmpeg `overlay` filter. Generalizing that to "a library of effects, each rendered to a PNG sequence or alpha .mov, then composited in one ffmpeg invocation" is a small extension. No changes to the per-frame loop. -### Architecture +#### Architecture ``` openshorts/motion_graphics/ @@ -72,7 +294,7 @@ openshorts/motion_graphics/ └── animated_emoji.py class AnimatedEmojiEffect ``` -### Files to add +#### Files to add - `openshorts/motion_graphics/base.py` - `openshorts/motion_graphics/compositor.py` @@ -81,7 +303,7 @@ openshorts/motion_graphics/ - `openshorts/models/motion_graphics.py` — Pydantic schemas (`EffectInstance`, `RenderTimeline`, etc.) - Frontend: a `MotionGraphicsModal.jsx` matching the existing `HookModal` / `SubtitleModal` pattern (defer until UI work is in scope) -### Integration +#### Integration The compositor sits *after* the vertical-reframing step and *before* the audio mux in `openshorts/video/pipeline.py`. Easiest way to wire it in @@ -89,7 +311,7 @@ is to make `process_video_to_vertical()` accept an optional `motion_graphics_timeline` argument and, if present, route the silent-video output through the compositor before the audio merge. -### Risks the pipeline analysis flagged +#### Risks the pipeline analysis flagged - **Re-encoding per overlay.** Mitigated by the compositor building a single `filter_complex` chain — the video is decoded and re-encoded @@ -99,14 +321,14 @@ silent-video output through the compositor before the audio merge. --- -## Feature A — Background Soundtracks + SFX with Ducking +### Feature A — Background Soundtracks + SFX with Ducking -### Why second +#### Why second Logically independent of layouts. Needs the FFmpeg wrapper done so the mixer can compose `amix` + `volume` + `silencedetect` chains cleanly. -### Architecture +#### Architecture ``` openshorts/audio/ @@ -128,7 +350,7 @@ openshorts/audio/ # Prompt lives at openshorts/prompts/sfx_cues.md. ``` -### Files to add +#### Files to add - `openshorts/audio/mixer.py` - `openshorts/audio/library.py` @@ -138,7 +360,7 @@ openshorts/audio/ - `openshorts/models/audio.py` - `assets/music/manifest.json` + a small set of CC-licensed tracks (or stub manifest + user uploads in v1) -### Integration +#### Integration Inside `openshorts/video/pipeline.py:process_video_to_vertical()` at the existing audio-mux step (today around the `merge_command` block). The @@ -146,7 +368,7 @@ audio mixer takes the original audio from `temp_audio_output`, mixes in the soundtrack + cues, and writes the mixed audio back over the intermediate file before the final mux. The video side never sees this. -### Risks +#### Risks - **Speech-detection accuracy.** When word timings are unreliable (background noise, music in the source), fall back to FFmpeg @@ -157,16 +379,16 @@ intermediate file before the final mux. The video side never sees this. --- -## Feature B — Layout Templates +### Feature B — Layout Templates -### Why last +#### Why last Touches the per-frame loop in `openshorts/video/pipeline.py`. The other two features add new boxes alongside the loop; this one rewrites how the loop branches. Biggest blast radius — best to land it after C and A are shipped and the test suite has shaken out any edge cases. -### Architecture +#### Architecture ``` openshorts/layouts/ @@ -181,13 +403,13 @@ openshorts/layouts/ └── side_by_side.py class SideBySideLayout # stub for the next variant ``` -### Files to add +#### Files to add - `openshorts/layouts/base.py`, `vertical_panorama.py`, `educational.py`, `side_by_side.py` - `openshorts/routes/layouts.py` — `layout` field accepted on `POST /api/process`; later `POST /api/layout/reapply` to swap layout on an existing job's clips without re-transcribing - `openshorts/models/layouts.py` -### Pipeline change +#### Pipeline change The branching at the heart of `process_video_to_vertical()` (the `if current_strategy == 'GENERAL': ... else: ...` block) becomes: @@ -210,7 +432,7 @@ At each frame, both crops are computed and stacked vertically. If no face is detected for the presenter slot, falls back to vertical panorama for that segment. -### Risks +#### Risks - **Per-frame cost.** Two cameramen + two crops doubles the detection / transform cost. Mitigation: detect once per frame; both @@ -228,17 +450,26 @@ for that segment. | Migrate every `subprocess.run(['ffmpeg', ...])` to `openshorts/video/ffmpeg.py` | Many call sites (app.py, video/pipeline.py, overlays/*, editing/ai_filters.py, saas/pipeline.py). Migrating all of them in one pass would have ballooned the restructure commit set. | One caller per commit. Tests between. The hook overlay in `overlays/hooks.py:add_hook_to_video()` is a good first migration — small, well-tested. | | Internal split of `openshorts/saas/pipeline.py` | 1474-line file. No direct test coverage (only via the OpenAPI contract). Splitting it carries risk without the safety net of tests. | Per the original plan: `saas/research.py` (scraping + analyze), `saas/scripting.py`, `saas/media.py` (fal.ai + ElevenLabs TTS), `saas/compositing.py`, `saas/pipeline.py` (orchestrator). Add focused unit tests for the research + scripting + compositing layers as you split them. | | `openshorts/core/job_store.py` + `api_keys.py` resolver | Today the job-state dicts (`jobs`, `thumbnail_sessions`, `publish_jobs`, `saas_jobs`) live as globals in `app.py`. The router split is a natural place to extract them. | Land alongside the router split, not before — extracting them prematurely just shifts where the globals live without delivering value. | -| Frontend restructure | Explicitly out of scope per the planning Q&A — frontend changes are deferred to a separate round. | When the user is ready: split `dashboard/src/App.jsx` along the same modal-per-feature axes as the backend routes, and introduce a centralized api client. | +| Frontend restructure | Done in the 4-phase UI overhaul (commits 667a88e → 95ca831): `App.jsx` is now 47 lines, state lives in `frontend/src/state/`, the wizards / shell / Settings VS-Code layout are all in place. Remaining frontend work is feature-level, not structural. | n/a — superseded. | --- ## What landed in this restructure -For posterity. Phase 0 + Phase 1 + Phases 2-5 produced these commits on -`chore/restructure-and-docs` (newest first): - -- `docs(claude.md): add per-folder sub-CLAUDE.md stubs` — five `CLAUDE.md` files at directory boundaries. -- `docs(claude.md): rewrite with structured guidance + auto-managed sections` — the new CLAUDE.md. +For posterity. Phase 0 + Phase 1 + Phases 2-5 + the 4-phase UI overhaul + +the smoke-test fix commit produced these on `chore/restructure-and-docs` +(newest first): + +- `fix(smoke-test): runtime bugs + Codex H1/H2/M3 remediation` — backend `run_job` subprocess + LongForm StrictMode timer + JSX `>` warnings + Codex H1 (MP4/MOV signature validation) + H2 (wizard reset on File loss) + M3 (polling AbortController + terminal-status guard). +- `feat(ui): phase 4 — long-form 4-step wizard + Dashboard`. +- `feat(ui): phase 3 — short-form 4-step wizard + UI primitives`. +- `feat(ui): phase 2 — Settings VS-Code layout + notifications + tooltips`. +- `feat(ui): phase 1 — shell + theme + routing skeleton`. +- `feat(brand-kit): brand kit settings + font upload + port refresh`. +- `chore(restructure): split repo into backend/ + frontend/ + renderer/ + assets/`. +- `docs(roadmap): design future features + document deferred refactors`. +- `docs(claude.md): add per-folder sub-CLAUDE.md stubs for high-rule areas`. +- `docs(claude.md): rewrite with structured guidance + auto-managed sections`. - `chore(tooling): add CLAUDE.md auto-updater + pre-commit hook`. - `docs(env): expand .env.example to match what the code actually reads`. - `chore(restructure): Dockerfile CMD points at openshorts.app:app`. From 7d073cbccef7a9ebfed1b4cbbb3ac8bf440fdb1e Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 09:39:48 -0400 Subject: [PATCH 27/43] fix(short-form): normalize backend job status + result key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surfaced by the first real Gemini-key smoke test. The short-form Processing step polled `/api/status/{job_id}` and trusted the keys verbatim, but the backend contract speaks a different vocab than the wizard: backend wizard expected ---- ---- status=completed status=complete status=failed status=error result=... result=... (wizard was reading data.results) So a successful job never tripped the overallStatus → 'complete' transition (Review button stayed disabled) and the clip metadata never reached `j.result` (Review step would have shown an empty list). The legacy useJobPolling.js already mapped both at the boundary — this mirror that here. Added `normalizeJobPayload(data)` next to `fetchStatus` so the rest of the component stays in the wizard's vocab. Verified end-to-end on the demo MP4: Gemini returns 1 viral clip, wizard auto-advances to Review, PhoneFrame renders the 17 s vertical output with title, description, Download, Publish×5, Schedule×5 buttons. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- .../src/pages/ShortForm/steps/Processing.jsx | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/frontend/src/pages/ShortForm/steps/Processing.jsx b/frontend/src/pages/ShortForm/steps/Processing.jsx index d8a53f5d..5bd1bf90 100644 --- a/frontend/src/pages/ShortForm/steps/Processing.jsx +++ b/frontend/src/pages/ShortForm/steps/Processing.jsx @@ -37,6 +37,18 @@ async function fetchStatus(jobId, signal) { return res.json(); } +// Backend vocab is `queued | processing | completed | failed` and the +// payload uses `result` (singular). The wizard's done check + StatusIcon +// expect `complete | error` and a `result` key. Normalize once at the +// boundary so the rest of the component speaks the wizard's vocab. +function normalizeJobPayload(data) { + const status = + data.status === 'completed' ? 'complete' : + data.status === 'failed' ? 'error' : + data.status; + return { status, logs: data.logs, result: data.result }; +} + function saveHistory(entry) { try { const raw = localStorage.getItem(HISTORY_KEY); @@ -105,6 +117,7 @@ export default function Processing({ wizard }) { try { const data = await fetchStatus(j.jobId, controller.signal); if (cancelled) return; + const norm = normalizeJobPayload(data); wizard.setData((prev) => { const cur = prev.jobs[fileId]; if (cur?.status === 'complete' || cur?.status === 'error') return prev; @@ -114,9 +127,9 @@ export default function Processing({ wizard }) { ...prev.jobs, [fileId]: { ...cur, - status: data.status || cur.status, - logs: data.logs || cur.logs, - result: data.results || cur.result, + status: norm.status || cur.status, + logs: norm.logs || cur.logs, + result: norm.result || cur.result, }, }, }; From fce9773e87213ebe806a8fb561f7a8f5e4e6b650 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 10:26:03 -0400 Subject: [PATCH 28/43] chore(docs): consolidate project CLAUDE.md into ~/.claude/CLAUDE.md Project-specific guidance now lives under a "## OpenShorts (project-specific)" H2 section in the user's global CLAUDE.md. scripts/update_claude_md.py is retargeted to that file (overridable via OPENSHORTS_CLAUDE_MD) and remains idempotent; the project-root CLAUDE.md is removed. Trade-off: the OpenShorts module map + env table is now loaded by Claude in every project session, not just this repo's. Re-pointable later via the env var. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- CLAUDE.md | 235 ------------------------------------ scripts/update_claude_md.py | 23 ++-- 2 files changed, 16 insertions(+), 242 deletions(-) delete mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index 78566c83..00000000 --- a/CLAUDE.md +++ /dev/null @@ -1,235 +0,0 @@ -# CLAUDE.md - -Guidance for Claude Code (and humans) when working with the OpenShorts repo. - -## Project - -OpenShorts is an AI-powered vertical short-video generator. It transforms -YouTube videos and local uploads into 9:16 viral clips for TikTok, Reels, -and Shorts. The pipeline uses Google Gemini for viral-moment detection and -title generation, faster-whisper for transcription, PySceneDetect for scene -boundaries, MediaPipe + YOLOv8 for face/person tracking, and FFmpeg for all -encoding/overlay/mux work. - -## Top-level layout - -``` -openshorts/ -├── backend/ # 🐍 Python FastAPI — API, video pipeline, tests -├── frontend/ # ⚛️ React + Vite — the dashboard UI -├── renderer/ # 🎬 Remotion service (TypeScript) + compositions -├── assets/ # 🖼️ Committed static files (fonts, screenshots) -├── scripts/ # 🛠️ Dev tooling (CLAUDE.md auto-updater, hook installer) -└── docker-compose.yml -``` - -Each top-level folder is self-contained: `backend/` has its own `Dockerfile` and Python deps, `frontend/` has its own `package.json`, `renderer/` bundles its own TypeScript. Docker Compose orchestrates all three. - -## Quick start - -```bash -# Full stack (recommended) -docker compose up --build -# Frontend → http://localhost:3001 -# Backend → http://localhost:3002 -# Renderer → http://localhost:3003 - -# Backend only (local dev — needs Python 3.11+ and FFmpeg on PATH) -cd backend -pip install -r requirements.txt -r requirements-dev.txt -pip install -e . -pytest -m "not e2e" # unit + API contract suite (~0.6s) -uvicorn app.main:app --host 0.0.0.0 --port 3002 - -# Frontend only -cd frontend && npm install && npm run dev -``` - -Install the CLAUDE.md auto-updater hook once after cloning: - -```bash -bash scripts/install_hooks.sh -``` - -## Where things go (decision table) - -When you want to **add** something, this is where it lands: - -| If you want to add… | Drop it in… | Notes | -| --- | --- | --- | -| A new HTTP endpoint | `backend/app/routes/<domain>.py` + register in `backend/app/main.py` | The full router split from `main.py` is in flight; until it ships, edit `backend/app/main.py` directly. | -| A new FFmpeg operation | `backend/app/video/ffmpeg.py` | Never call `subprocess.run(['ffmpeg', ...])` outside this module. | -| A new external service client | `backend/app/integrations/<service>.py` | Each one exposes a typed Python client. | -| A new AI model / inference call | `backend/app/ml/<purpose>.py` | Detection, transcription, viral extraction, etc. | -| A new layout template | `backend/app/layouts/<name>.py` | Subclass `Layout` (see [ROADMAP.md](ROADMAP.md) feature B). | -| A new motion-graphic effect | `backend/app/motion_graphics/library/<name>.py` | Subclass `MotionGraphicEffect` (see [ROADMAP.md](ROADMAP.md) feature C). | -| A new audio mixer / SFX | `backend/app/audio/<concern>.py` | See [ROADMAP.md](ROADMAP.md) feature A. | -| A new Gemini prompt | `backend/app/prompts/<name>.md` or `backend/app/editing/prompts.py` | Externalize prompts; don't bury them in handler code. | -| A new Pydantic schema | `backend/app/models/<domain>.py` | One file per request/response domain. | -| A new shared FFmpeg / filter helper | `backend/app/utils/filters.py` | Already used by editing + future motion-graphics compositor. | -| A new core infrastructure piece | `backend/app/core/<concern>.py` | Job queue, job store, api-key resolver, logging. | -| A new frontend page / component | `frontend/src/components/<Name>.jsx` | Match existing modal/card naming. | -| A new Remotion composition | `renderer/compositions/src/` | Service auto-bundles compositions in this folder. | - -When you want to **remove** something: - -1. Delete the route file (or the function within it). -2. `grep -rn <removed_name> backend/` to find dead imports. -3. Delete the corresponding Pydantic model in `backend/app/models/` if any. -4. Delete or update tests that reference it. -5. Run `python scripts/update_claude_md.py` (the pre-commit hook will do this for you). - -## Repo layout - -The top-level folders. **The table below is auto-managed by `scripts/update_claude_md.py`** — never edit it by hand. - -<!-- AUTO:REPO-MAP:START --> -| Folder | What it is | -| --- | --- | -| `assets/` | Committed static assets (fonts, screenshots). | -| `backend/` | Python FastAPI service — the API, video pipeline, and tests. | -| `frontend/` | React + Vite dashboard — the UI users interact with. | -| `output/` | Runtime: generated clips and thumbnails (gitignored). | -| `renderer/` | Remotion render microservice (TypeScript) + compositions. | -| `scripts/` | Developer tooling (update_claude_md.py, install_hooks.sh). | -| `uploads/` | Runtime: incoming video uploads (gitignored). | -<!-- AUTO:REPO-MAP:END --> - -### Backend package (`backend/app/`) - -The Python package follows classical layered conventions. Each subfolder -has a one-line purpose statement in its `__init__.py`. - -| Folder | Rule | -| --- | --- | -| `backend/app/core/` | Cross-cutting infra: job queue, job store, API-key resolver, logging. | -| `backend/app/routes/` | FastAPI routers, one module per API domain. | -| `backend/app/video/` | All video work goes here. **FFmpeg only via `video/ffmpeg.py`.** | -| `backend/app/ml/` | AI inference: face/person detection, transcription, viral extraction. | -| `backend/app/audio/` | Future feature A — soundtracks + ducking. | -| `backend/app/layouts/` | Future feature B — layout templates (panorama, educational, etc.). | -| `backend/app/motion_graphics/` | Future feature C — animated overlays + multi-effect compositor. | -| `backend/app/editing/` | AI-generated FFmpeg filter pipeline. | -| `backend/app/overlays/` | Hook cards + subtitle generation / burn-in. | -| `backend/app/ingest/` | YouTube downloads + local upload handling. | -| `backend/app/saas/` | SaaSShorts UGC pipeline (research → script → media → composite). | -| `backend/app/integrations/` | External-service clients (S3, ElevenLabs, fal.ai, Upload-Post). | -| `backend/app/thumbnails/` | YouTube thumbnail workflow (titles, images, descriptions). | -| `backend/app/prompts/` | Externalized Gemini prompt templates. | -| `backend/app/models/` | Pydantic request/response schemas grouped by domain. | -| `backend/app/utils/` | Shared helpers: filter sanitization, path utilities. | - -## Module map - -Every Python module under `backend/app/` and its public surface. **Auto-managed** — regenerated by the pre-commit hook from each file's docstring. - -<!-- AUTO:MODULE-MAP:START --> -| Module | Purpose | Public surface | -| --- | --- | --- | -| `backend/app/cli.py` | Compat shim + CLI entrypoint. | _(none)_ | -| `backend/app/editing/ai_filters.py` | VideoEditor: Gemini-driven FFmpeg filter generation and application. | `VideoEditor` | -| `backend/app/editing/prompts.py` | Gemini prompt templates for AI video-effect generation. | `build_ffmpeg_filter_prompt`, `build_effects_config_prompt` | -| `backend/app/ingest/youtube.py` | YouTube downloader with bot-detection workarounds (yt-dlp + cookies + alt clients). | `sanitize_filename`, `download_youtube_video` | -| `backend/app/integrations/elevenlabs.py` | ElevenLabs Dubbing API client: AI voice translation across 30+ languages. | `create_dubbing_project`, `get_dubbing_status`, `download_dubbed_video`, `translate_video`, `get_supported_languages` | -| `backend/app/integrations/s3.py` | AWS S3 client: clip uploads, actor gallery, UGC video gallery, presigned URLs. | `upload_file_to_s3`, `get_s3_client`, `generate_presigned_url`, `list_all_clips`, `upload_actor_to_s3`, `list_actor_gallery`, `upload_video_to_gallery`, `list_video_gallery`, `upload_job_artifacts` | -| `backend/app/main.py` | FastAPI application entrypoint: routes, job queue, and the wire-up of every backend feature. | `cleanup_jobs`, `process_queue`, `run_job_wrapper`, `lifespan`, `ProcessRequest`, `enqueue_output`, `run_job`, `get_config`, `process_endpoint`, `get_status`, `EditRequest`, `edit_clip`, `SubtitleRequest`, `get_clip_transcript`, `proxy_render`, `proxy_render_status`, `EffectsGenerateRequest`, `generate_effects_config`, `add_subtitles`, `HookRequest`, `add_hook`, `TranslateRequest`, `get_languages`, `translate_clip`, `SocialPostRequest`, `post_to_socials`, `get_social_user`, `thumbnail_upload`, `thumbnail_analyze`, `ThumbnailTitlesRequest`, `thumbnail_titles`, `thumbnail_generate`, `ThumbnailDescribeRequest`, `thumbnail_describe`, `thumbnail_publish`, `thumbnail_publish_status`, `SaaSAnalyzeRequest`, `saasshorts_analyze`, `SaaSActorRequest`, `saasshorts_actor_upload`, `saasshorts_actor_options`, `saasshorts_video_gallery`, `SaaSPostRequest`, `saasshorts_post_to_socials`, `gallery_html_page`, `video_html_page`, `saasshorts_actor_gallery`, `SaaSGenerateRequest`, `saasshorts_generate`, `saasshorts_status`, `saasshorts_voices` | -| `backend/app/ml/detection.py` | Face and person detection: MediaPipe BlazeFace (primary) + YOLOv8 (fallback). | `detect_face_candidates`, `detect_person_yolo` | -| `backend/app/ml/transcription.py` | faster-whisper transcription: CPU-optimized (INT8 quantization) with word timestamps. | `transcribe_video` | -| `backend/app/ml/viral_extraction.py` | Gemini 2.5 Flash viral-moment extraction: picks 3-15 short clips from a transcript. | `get_viral_clips` | -| `backend/app/overlays/hooks.py` | Hook text overlays: PIL-rendered cards (PNG) burned onto video via FFmpeg. | `download_font_if_needed`, `create_hook_image`, `add_hook_to_video` | -| `backend/app/overlays/subtitles_generate.py` | SRT subtitle generation: transcription and word-level grouping into short lines. | `transcribe_audio`, `generate_srt_from_video`, `generate_srt`, `format_srt_block` | -| `backend/app/overlays/subtitles_render.py` | Subtitle burn-in: FFmpeg subtitles filter + ASS color/style conversion. | `hex_to_ass_color`, `burn_subtitles` | -| `backend/app/saas/pipeline.py` | SaaSShorts: AI-powered UGC video generator for SaaS products. | `research_saas_online`, `scrape_website`, `analyze_saas`, `generate_scripts`, `generate_actor_images`, `generate_actor_image`, `generate_voiceover`, `get_elevenlabs_voices`, `generate_talking_head`, `generate_talking_head_lowcost`, `generate_broll`, `transcribe_audio_for_subs`, `generate_tiktok_subs`, `generate_srt_from_script`, `composite_video`, `generate_full_video` | -| `backend/app/thumbnails/descriptions.py` | YouTube description + chapter-marker generation from transcript segments. | `generate_youtube_description` | -| `backend/app/thumbnails/images.py` | Thumbnail image generation via Gemini multimodal image preview model. | `generate_thumbnail` | -| `backend/app/thumbnails/titles.py` | Gemini-driven viral title generation and conversational refinement. | `analyze_video_for_titles`, `refine_titles` | -| `backend/app/utils/filters.py` | Shared FFmpeg filter helpers: chain splitting, sanitization, zoompan size enforcement. | `split_filter_chain`, `enforce_zoompan_output_size`, `sanitize_filter_string` | -| `backend/app/video/ffmpeg.py` | Single FFmpeg wrapper for the entire codebase. | `FFmpegError`, `run`, `probe_resolution`, `probe_duration`, `cut`, `extract_audio`, `mux_video_audio`, `overlay_png`, `build_filter_complex` | -| `backend/app/video/pipeline.py` | process_video_to_vertical orchestrator: scenes -> strategy -> per-frame crop -> mux. | `process_video_to_vertical` | -| `backend/app/video/reframing.py` | Vertical reframing helpers: blurred-background 'General Shot' composite. | `create_general_frame` | -| `backend/app/video/scene_analysis.py` | PySceneDetect scene boundaries + per-scene TRACK/GENERAL strategy analysis. | `detect_scenes`, `get_video_resolution`, `analyze_scenes_strategy` | -| `backend/app/video/tracking.py` | SmoothedCameraman and SpeakerTracker: the heart of stabilized vertical reframing. | `SmoothedCameraman`, `SpeakerTracker` | -<!-- AUTO:MODULE-MAP:END --> - -## Processing pipeline - -1. **Ingest** — `backend/app/ingest/youtube.py:download_youtube_video()` or a local upload. -2. **Transcribe** — `backend/app/ml/transcription.py:transcribe_video()` (faster-whisper, word timestamps). -3. **Scene-detect** — `backend/app/video/scene_analysis.py:detect_scenes()` (PySceneDetect). -4. **Viral extraction** — `backend/app/ml/viral_extraction.py:get_viral_clips()` (Gemini 2.5 Flash picks 3–15 clips, 15–60 s each). -5. **Cut clips** — FFmpeg `-ss`/`-to` per clip. -6. **Strategy** — `backend/app/video/scene_analysis.py:analyze_scenes_strategy()` decides TRACK vs GENERAL per scene. -7. **Reframe** — `backend/app/video/pipeline.py:process_video_to_vertical()` runs the per-frame loop. -8. **Effects** (optional) — `backend/app/editing/ai_filters.py:VideoEditor` injects Gemini-generated FFmpeg filters. -9. **Hooks + subtitles** (optional) — `backend/app/overlays/`. -10. **Translate** (optional) — `backend/app/integrations/elevenlabs.py:translate_video()` dubs into 30+ languages. -11. **Backup + distribute** — `backend/app/integrations/s3.py` + `backend/app/integrations/upload_post.py` (planned). - -## API surface - -| Method | Route | Purpose | -| --- | --- | --- | -| POST | `/api/process` | Submit a video (URL or upload) for processing. | -| GET | `/api/status/{job_id}` | Poll status + logs. | -| POST | `/api/edit` | Apply Gemini-generated FFmpeg filters to a clip. | -| POST | `/api/effects/generate` | Get a structured EffectsConfig for Remotion. | -| POST | `/api/render/{render_id}` | Render via the Remotion microservice. | -| POST | `/api/subtitle` | Generate + burn subtitles. Auto-transcribes dubbed videos. | -| POST | `/api/hook` | Burn a text-hook PNG onto a clip. | -| POST | `/api/translate` | AI voice dubbing via ElevenLabs. | -| GET | `/api/translate/languages` | List supported languages. | -| POST | `/api/social/post` | Distribute via Upload-Post. | -| POST | `/api/thumbnail/*` | YouTube thumbnail workflow (titles, images, descriptions). | -| POST | `/api/saasshorts/*` | SaaS UGC pipeline. | - -The full route inventory (32 endpoints) is locked in `tests/snapshots/baseline.openapi.json`. - -## Environment - -Server-side env vars the code actually reads. **Auto-managed** — generated from `.env.example`. - -<!-- AUTO:ENV:START --> -| Variable | Default | Notes | -| --- | --- | --- | -| `GEMINI_API_KEY` | `_(empty — must set)_` | Required (server-side reads via os.getenv) | -| `AWS_ACCESS_KEY_ID` | `_(empty — must set)_` | Optional: AWS S3 (clip backup + public gallery) | -| `AWS_SECRET_ACCESS_KEY` | `_(empty — must set)_` | Optional: AWS S3 (clip backup + public gallery) | -| `AWS_REGION` | `eu-west-3` | Optional: AWS S3 (clip backup + public gallery) | -| `AWS_S3_BUCKET` | `_(empty — must set)_` | Optional: AWS S3 (clip backup + public gallery) | -| `AWS_S3_PUBLIC_BUCKET` | `_(empty — must set)_` | Optional: AWS S3 (clip backup + public gallery) | -| `DISABLE_YOUTUBE_URL` | `false` | Optional: YouTube ingestion | -| `YOUTUBE_COOKIES` | _(unset)_ | Optional: YouTube ingestion (commented — optional) | -| `RENDER_SERVICE_URL` | `http://renderer:3100` | Optional: Remotion render service | -| `MAX_CONCURRENT_JOBS` | `5` | Tuning | -| `VITE_API_URL` | `http://localhost:3002` | Tuning | -| `VITE_ENCRYPTION_KEY` | _(unset)_ | Tuning (commented — optional) | -| `ELEVENLABS_API_KEY` | _(unset)_ | Tuning (commented — optional) | -| `UPLOAD_POST_API_KEY` | _(unset)_ | Tuning (commented — optional) | -| `FAL_KEY` | _(unset)_ | Tuning (commented — optional) | -<!-- AUTO:ENV:END --> - -ElevenLabs / Upload-Post / fal.ai keys are **client-side** (encrypted in browser localStorage, sent per-request via headers). They are NOT read from `.env`. - -## Conventions - -1. **Single FFmpeg wrapper.** Every `subprocess.run(['ffmpeg', ...])` call should funnel through `backend/app/video/ffmpeg.py`. Migration of existing callers is incremental — but new code must use the wrapper. -2. **API keys via headers, not env.** Client-side keys (Gemini, ElevenLabs, Upload-Post, fal.ai) arrive on each request as `X-...-Key`. The resolver helper for these lives in `backend/app/core/api_keys.py` (planned). Do NOT call `request.headers.get('X-...')` outside that file. -3. **Prompts as files.** New Gemini prompts go in `backend/app/prompts/<name>.md` and are loaded by name. Editing-domain prompts may stay inline in `backend/app/editing/prompts.py`. -4. **Every module starts with a docstring.** The pre-commit hook (`scripts/update_claude_md.py`) fails the commit if any `.py` file under `backend/app/` lacks one. Use a single line — it becomes the row in the auto-managed module map. -5. **Tests first.** A characterization test suite (`tests/`) was written *before* the restructure. Anything that touches behavior should keep `pytest -m "not e2e"` 100% green. The OpenAPI snapshot in `tests/snapshots/baseline.openapi.json` pins the public API. -6. **No new global dicts in routers.** Job state goes through `backend/app/core/job_store.py` (planned). Today, `backend/app/main.py` still owns these dicts — keep them centralized there until the routers are split out. - -## Pointers - -- `ROADMAP.md` — designs for the three upcoming features (motion graphics, soundtracks, layouts) and deferred refactors (router split, FFmpeg-wrapper migration, saasshorts internal split). -- `scripts/update_claude_md.py` — what regenerates the auto-managed sections of this file. -- `scripts/install_hooks.sh` — one-liner to wire up the pre-commit hook. -- `tests/snapshots/baseline.openapi.json` — the contract that any backend change must keep green. -- `frontend/` — the React/Vite frontend (deliberately out of scope for the current restructure). - -## Tech stack - -- **Backend:** Python 3.11, FastAPI, google-genai, faster-whisper, ultralytics (YOLOv8), mediapipe, opencv-python, yt-dlp, FFmpeg, httpx. -- **Frontend:** React 18, Vite 4, Tailwind CSS 3.4. -- **External:** Google Gemini, ElevenLabs Dubbing, Upload-Post, fal.ai (Flux + Kling), Remotion. -- **Infra:** Docker + Docker Compose, AWS S3. diff --git a/scripts/update_claude_md.py b/scripts/update_claude_md.py index 3d0b744a..33be6d7c 100755 --- a/scripts/update_claude_md.py +++ b/scripts/update_claude_md.py @@ -5,9 +5,11 @@ What this does ============== -CLAUDE.md is split into hand-written prose and three auto-managed tables. The -auto-managed sections live between marker comments and get rewritten by this -script on every commit (via the pre-commit hook in .pre-commit-config.yaml): +The OpenShorts project content lives inside the user's global Claude config at +``~/.claude/CLAUDE.md`` under the ``## OpenShorts (project-specific)`` H2 +section. That section contains three auto-managed tables flanked by marker +comments; this script rewrites the body between those markers on every commit +(via the pre-commit hook in .pre-commit-config.yaml): <!-- AUTO:REPO-MAP:START --> ... <!-- AUTO:REPO-MAP:END --> <!-- AUTO:MODULE-MAP:START --> ... <!-- AUTO:MODULE-MAP:END --> @@ -18,7 +20,8 @@ 2. Parses every backend/app/*.py module via ast, extracting the one-line docstring + the names of public functions/classes (MODULE-MAP). 3. Reads .env.example and renders the env-vars table (ENV). -4. Locates the markers in CLAUDE.md and rewrites only the content between them. +4. Locates the markers in ~/.claude/CLAUDE.md and rewrites only the content + between them. It is idempotent: running it twice with no source changes is a no-op. @@ -50,7 +53,10 @@ REPO_ROOT = Path(__file__).resolve().parent.parent PACKAGE_ROOT = REPO_ROOT / "backend" / "app" -CLAUDE_MD = REPO_ROOT / "CLAUDE.md" +# Project content lives in the user's global CLAUDE.md under the +# "## OpenShorts (project-specific)" H2 section. CLAUDE_OVERRIDE lets users +# (e.g. CI in a different env) point the writer somewhere else. +CLAUDE_MD = Path(os.environ.get("OPENSHORTS_CLAUDE_MD", str(Path.home() / ".claude" / "CLAUDE.md"))) ENV_EXAMPLE = REPO_ROOT / ".env.example" @@ -244,8 +250,11 @@ def main() -> int: return 1 CLAUDE_MD.write_text(updated, encoding="utf-8") - print(f"✓ Rewrote {CLAUDE_MD.relative_to(REPO_ROOT)} " - f"(REPO-MAP, MODULE-MAP, ENV sections).") + try: + display_path = CLAUDE_MD.relative_to(REPO_ROOT) + except ValueError: + display_path = CLAUDE_MD + print(f"✓ Rewrote {display_path} (REPO-MAP, MODULE-MAP, ENV sections).") return 0 From 0f844337e5fa119d7d5b84eadb6c71ee0737f179 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 10:37:00 -0400 Subject: [PATCH 29/43] fix(short-form): probe duration on Upload + freshen HANDOFF Upload.jsx now reads videoElement.duration via a hidden <video preload="metadata"> when files are added, so Processing.jsx can show a real ETA instead of a hashed placeholder. HANDOFF.md captures the session-end state for the next agent. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- HANDOFF.md | 980 ++++++++---------- frontend/src/pages/ShortForm/steps/Upload.jsx | 47 +- 2 files changed, 503 insertions(+), 524 deletions(-) diff --git a/HANDOFF.md b/HANDOFF.md index 4ca2a199..4d39af2e 100644 --- a/HANDOFF.md +++ b/HANDOFF.md @@ -1,6 +1,8 @@ -# OpenShorts — Full Project Handoff +# OpenShorts — Handoff -A self-contained briefing for the next agent (human or LLM) picking up this codebase. Reads top-to-bottom. +A self-contained briefing for the next agent (human or LLM). Reads top-to-bottom. + +If you read nothing else, read **§3 Critical bugs already fixed**, **§5 Outstanding work**, and **§6 Operating rules** — those keep you from re-walking past landmines. --- @@ -8,682 +10,620 @@ A self-contained briefing for the next agent (human or LLM) picking up this code OpenShorts is an **AI-powered vertical short-video generator**. Drop in a long video (YouTube URL or local upload) and it produces 3–15 viral 9:16 clips ready for TikTok / Reels / Shorts. -The hot path: +Hot path of the Clip Generator: + 1. Transcribe audio locally (faster-whisper, INT8). 2. Detect scene boundaries (PySceneDetect). 3. Send transcript to **Gemini 2.5 Flash** → returns 3–15 viral moments with start/end times and titles. 4. Cut each clip with FFmpeg. 5. Per-scene reframe to 9:16 — either tracking the active speaker (MediaPipe face + YOLOv8 person) or a panoramic blurred-background ("General") composite. -6. Optional layers: AI-generated FFmpeg effects, text hook PNGs, burn-in subtitles, ElevenLabs voice dubbing, S3 backup, Upload-Post distribution to socials. +6. Optional layers: AI-generated FFmpeg effects, text hook PNGs, burn-in subtitles, ElevenLabs voice dubbing, S3 backup, Upload-Post distribution. + +The frontend now wraps this in a **multi-page platform shell**: -Additional dashboards in the same app: -- **AI Agent** (Claude Code skill — runs from terminal, auto-clips a folder of long verticals). -- **UGC Gallery** + **SaaSShorts** (AI talking-head ad generator using fal.ai Flux + Kling). -- **YouTube Studio** (Gemini-generated titles, thumbnails, descriptions for long-form YouTube). -- **Settings** (API key paste + **Brand Kit**). +- **Short-form** — 4-step wizard for up to 5 source videos in one batch. Each runs the same `/api/process` pipeline in parallel. +- **Long-form** — 4-step wizard with a chapter-aware editor for re-exporting segments as shorts. The pipeline that actually generates chapters is stubbed; the wizard simulates progress and seeds placeholder chapters. +- **Clip Generator** — the original single-job flow at `/api/process` (still works; this is what the wizards are layered on top of). +- **Dashboard** — StatCards + scheduled-uploads list + recent activity, derived from localStorage history + the notifications store. +- **Settings** — Brand Kit, API Keys, and placeholder section pages for Subtitle style / Color presets / Export defaults / per-platform. +- **Legacy** — SaaSShorts UGC pipeline, YouTube thumbnails, UGC gallery, AI Agent terminal — hidden from the sidebar but reachable at `/legacy/*` URLs. + +See `CLAUDE.md` for the full decision table on **where new things go** (routes, FFmpeg ops, layouts, motion graphics, etc.) — auto-managed sections are regenerated by the pre-commit hook. --- -## 2. Current state (snapshot) +## 2. Current branch state | | | | --- | --- | -| **Branch** | `chore/restructure-and-docs` (19 commits ahead of `main`) | +| **Branch** | `chore/restructure-and-docs` (27 commits ahead of `main`) | +| **HEAD** | `7d073cb fix(short-form): normalize backend job status + result key` | +| **Working tree** | **NOT clean** — see §4 | | **Revert point** | `git reset --hard pre-restructure-20260519-1526` | -| **Tests** | 62/62 green (`cd backend && pytest -m "not e2e"`, ~0.6s) | +| **Tests** | 61/62 green (`cd backend && pytest -m "not e2e"`). 1 pre-existing OpenAPI snapshot drift — see §5. | | **OpenAPI baseline** | `backend/tests/snapshots/baseline.openapi.json` (35 endpoints) | -| **Docker** | All three services running | +| **Frontend build** | Green; 1616 modules, ~1289 KB JS chunk, 0 warnings | +| **Docker stack** | All three services run via `docker compose up --build` (read §6 first — there's a known volume gotcha) | | **Frontend URL** | http://localhost:3001 | | **Backend API** | http://localhost:3002 | | **Renderer** | http://localhost:3003 | -| **Uncommitted work** | Brand Kit feature — fully working, not yet committed. See section 9. | -### What's uncommitted right now +### Commit graph (top of branch) + ``` - M .env.example (RENDER_SERVICE_URL comment) - M CLAUDE.md (port refs + path refs) - M README.md (port refs) - M backend/app/main.py (font endpoints + words_per_line on SubtitleRequest) - M backend/app/overlays/subtitles_generate.py (max_words param threaded through generate_srt) - M backend/tests/snapshots/baseline.openapi.json (regenerated for new routes) - M docker-compose.yml (port mappings, volume mount fix) - M frontend/src/App.jsx (BrandKit import + Settings layout widened) - M frontend/src/components/HookModal.jsx (uses useBrandKit hook) - M frontend/src/components/ResultCard.jsx (sends words_per_line on /api/subtitle) - M frontend/src/components/SubtitleModal.jsx (uses useBrandKit hook) -?? assets/fonts/user/ (user-uploaded fonts target dir) -?? frontend/src/components/BrandKit.jsx (NEW — main brand kit UI block) -?? frontend/src/components/BrandPreview.jsx (NEW — canvas live preview) -?? frontend/src/components/FontPicker.jsx (NEW — scrollable in-font font picker) -?? frontend/src/components/PositionGrid.jsx (NEW — 3x3 anchor selector) -?? frontend/src/lib/brandKit.js (NEW — localStorage + hook + helpers) +7d073cb fix(short-form): normalize backend job status + result key +93f5907 docs(roadmap): add product roadmap + smoke-test follow-ups +43c2d96 fix(smoke-test): runtime bugs + Codex H1/H2/M3 remediation +95ca831 feat(ui): phase 4 — long-form 4-step wizard + Dashboard +97b7eff feat(ui): phase 3 — short-form 4-step wizard + UI primitives +337b509 feat(ui): phase 2 — Settings VS-Code layout + notifications + tooltips +667a88e feat(ui): phase 1 — shell + theme + routing skeleton +3d2b4f8 feat(brand-kit): brand kit settings + font upload + port refresh +55f0ef1 chore(restructure): split repo into backend/ + frontend/ + renderer/ + assets/ +1dd4b9a docs(roadmap): design future features + document deferred refactors ``` -The user has NOT been asked to commit yet. Ask before committing. +### Nothing has been pushed + +The user explicitly said this branch stays local until they decide otherwise. `mutonby/openshorts` is read-only for the active gh account — don't try to push there. If they ask to push, fork first or have them switch gh accounts. --- -## 3. Top-level architecture +## 3. Critical bugs already fixed -``` -openshorts/ ← repo root -├── backend/ 🐍 Python FastAPI (Python 3.11 in Docker, ≥ 3.9 locally) -├── frontend/ ⚛️ React + Vite + Tailwind (Node 18 in Docker) -├── renderer/ 🎬 Remotion render microservice (TypeScript) -├── assets/ 🖼️ Committed static files (fonts + screenshots) -├── scripts/ 🛠️ Dev tooling (CLAUDE.md auto-updater + pre-commit hook installer) -├── output/ (gitignored runtime) -├── uploads/ (gitignored runtime) -├── docker-compose.yml -├── README.md -├── CLAUDE.md ← auto-managed sections regenerated by pre-commit hook -├── ROADMAP.md -├── LICENSE -└── .env.example ← template; copy to .env before running -``` +These were all surfaced by the post-Phase-4 browser smoke test (the previous agent shipped Phases 1–4 without ever exercising the UI in a browser — `npm run build` doesn't catch runtime). Don't reintroduce them. + +### 🔴 BLOCKER: `run_job` invoked a missing entry point + +`backend/app/main.py:365` was running `python -u main.py` but no top-level `main.py` exists post-restructure — the CLI was moved to `backend/app/cli.py`. **Every short-form Processing job exited with code 2** (`python: can't open file '/app/main.py'`). Fixed → `python -u -m app.cli`. The CLI now actually runs. + +### 🔴 BLOCKER: Long-form simulated progress stuck at 0 % in dev + +`LongForm/steps/Processing.jsx` used a `startedRef` gate combined with a cleanup `clearInterval`. Under React 18 StrictMode (dev mode in docker), mount #1 set the ref + timer, the auto-cleanup cleared the timer, mount #2 bailed early — no timer running. Fixed by removing the gate and making the initial `setData` idempotent. + +**Pattern rule (don't repeat):** if a useEffect has cleanup AND a ref-based "run once" gate, StrictMode dev will silently break it. Either no cleanup OR no gate — pick one. -This split follows **`fastapi/full-stack-fastapi-template`** (the official tiangolo template) — the canonical FastAPI+React monorepo layout. Each top-level folder is self-contained and has its own Dockerfile where relevant. Docker Compose orchestrates the three deployable services. +### 🟠 Codex H1: file upload had no MIME / signature check + +`POST /api/process` accepted any file with `.mp4` in the name. Added `_ensure_video_upload(filename, first_chunk)` at `backend/app/main.py` — checks extension (`.mp4`/`.mov`) AND the `ftyp` box at byte offset 4 before writing to disk. Returns 415 with precise reason on mismatch. + +**Verified via curl:** text-content-with-.mp4 → 415 (ftyp), real-mp4-with-.txt → 415 (extension), real .mp4 → 200. + +### 🟠 Codex H2: wizard let users march past lost File handles + +File objects don't survive `JSON.stringify`. Both wizards persist `wizard.data` to localStorage. After a reload, `data.files[0].file` is a plain `{}` instead of a real File — and the wizards happily let you advance past Upload into a step that always fails. + +Fixed by adding an optional `resetOnRehydrate(mergedData)` predicate to `useWizard`. Both `Wizard.jsx` callers pass a File-presence check; rehydrate detects degraded state and forces step=0 + clears persistence. Verified end-to-end: upload → advance → reload → wizard now sits on Upload with cleared state. + +### 🟡 Codex M3: short-form polling could race on stale responses + +`ShortForm/steps/Processing.jsx` setInterval callback was async — if a `/api/status` call took longer than the 2 s poll interval, an older response could arrive after a newer one and overwrite `complete`/`error` back to `processing`. Cleanup also didn't abort in-flight fetches. + +Fixed by adding `AbortController` + `cancelled` flag, and a terminal-status guard in the setData updater (`if cur.status === 'complete' || cur.status === 'error' return prev`). Tested under the dev StrictMode double-mount. + +### 🔴 Backend/frontend contract mismatch (caught only by the real-key run) + +**This one only surfaced when a real Gemini key let the pipeline actually finish.** Dummy-key runs failed at Gemini and never exercised the success path. + +| backend sends | wizard was reading | +| --- | --- | +| `status: "completed"` | `'complete'` (so done-check never fired) | +| `status: "failed"` | `'error'` (so error rows never matched) | +| `result: {...}` | `data.results` (typo — the clips never reached `j.result`) | + +Without these mappings, a backend job that **finished cleanly and produced a clip** left the wizard sitting on "Process finished successfully." with Skip + Review both disabled. + +Fixed by adding `normalizeJobPayload(data)` next to `fetchStatus` in `ShortForm/steps/Processing.jsx` (mirrors what the legacy `frontend/src/hooks/useJobPolling.js` does for the Clip Generator). Backend vocab is `queued | processing | completed | failed` with `result` (singular); the wizard speaks `queued | processing | complete | error` with `result`. + +**Rule:** the legacy `useJobPolling.js` is the canonical reference for how to consume `/api/status` — copy the mapping when adding a new poll site. --- -## 4. Directory map (full) +## 4. Uncommitted working-tree changes -### `backend/` +When this handoff was written, the tree was dirty with **a follow-up UX pass on Short-form Processing** plus the corresponding CLAUDE.md rule. The user has not yet asked for these to be committed — confirm before committing. -``` -backend/ -├── Dockerfile # Python 3.11-slim + ffmpeg + libgl1 + node + yt-dlp -├── pyproject.toml # package "app" found by setuptools -├── requirements.txt # runtime deps (FastAPI, fastwhisper, ultralytics…) -├── requirements-dev.txt # pytest, httpx, respx, vcrpy -└── app/ # the Python package (importable as `app`) - ├── __init__.py - ├── main.py # FastAPI app + 35 routes (was: root app.py, 2256 lines) - ├── cli.py # CLI: python -m app.cli -i input.mp4 -o output/ - │ - ├── core/ # Cross-cutting infra (job queue, store, api-key resolver) — scaffolded - ├── routes/ # Per-domain routers — scaffolded (routes still in main.py) - │ - ├── video/ # Core video processing - │ ├── pipeline.py # process_video_to_vertical (orchestrator + per-frame loop) - │ ├── tracking.py # SmoothedCameraman, SpeakerTracker (heart of the pipeline) - │ ├── scene_analysis.py # detect_scenes, analyze_scenes_strategy (TRACK vs GENERAL) - │ ├── reframing.py # create_general_frame (blurred-background composite) - │ └── ffmpeg.py # Single FFmpeg wrapper (NEW — migration in-flight; existing - │ # subprocess.run(ffmpeg) calls NOT yet migrated) - │ - ├── ml/ # AI inference - │ ├── detection.py # MediaPipe BlazeFace + YOLOv8n - │ ├── transcription.py # faster-whisper INT8 (CPU) - │ └── viral_extraction.py # Gemini 2.5 Flash + cost analysis - │ - ├── ingest/ - │ └── youtube.py # yt-dlp with bot-detection workarounds (tv_embed, android, mweb) - │ - ├── editing/ - │ ├── ai_filters.py # VideoEditor — Gemini-driven FFmpeg filter generation - │ └── prompts.py # build_ffmpeg_filter_prompt, build_effects_config_prompt - │ - ├── overlays/ - │ ├── hooks.py # Hook PNG generation (PIL) + FFmpeg burn-in - │ ├── subtitles_generate.py # SRT generation (words → grouped lines) - │ └── subtitles_render.py # subtitles filter (FFmpeg) + ASS color conversion - │ - ├── thumbnails/ - │ ├── titles.py # Gemini viral title generation + refinement loop - │ ├── images.py # Gemini multimodal image preview (thumbnail gen) - │ └── descriptions.py # YouTube description + chapter markers - │ - ├── saas/ - │ └── pipeline.py # SaaSShorts UGC generator (research → script → media → composite) - │ # 1474 lines — internal split deferred (see ROADMAP) - │ - ├── integrations/ - │ ├── s3.py # AWS S3 (clip backup + public gallery + presigned URLs) - │ └── elevenlabs.py # Dubbing API (30+ languages) + SUPPORTED_LANGUAGES dict - │ - ├── prompts/ # Externalized Gemini prompts (.md files) - ├── models/ # Pydantic schemas (scaffolded — current schemas inline in main.py) - ├── utils/ - │ └── filters.py # Shared FFmpeg filter helpers (sanitize, chain split, zoompan enforce) - │ - ├── audio/ # ROADMAP feature A (soundtracks + ducking) — scaffolded - ├── layouts/ # ROADMAP feature B (template layouts) — scaffolded - └── motion_graphics/ # ROADMAP feature C (animated overlays) — scaffolded - └── library/ # individual effect modules - -└── tests/ # Characterization suite (62 tests) - ├── conftest.py # sys.modules stubs (cv2, mediapipe, ultralytics, torch, etc.) - ├── unit/ # ~5 modules, pure-Python, fast - ├── api/test_openapi_contract.py # OpenAPI snapshot — pins all 35 routes - ├── e2e/test_pipeline_smoke.py # Skipped by default; needs real ffmpeg + fixtures - ├── snapshots/baseline.openapi.json # The contract — 35-endpoint surface - └── fixtures/ # Test videos (small, deterministic) -``` +| File | What it does | +| --- | --- | +| `CLAUDE.md` | Adds Convention #7: short-form and long-form code MUST stay isolated. No cross-imports between `pages/ShortForm/` and `pages/LongForm/`. Shared things go in `frontend/src/hooks/`, `components/ui/`, `state/`, or `lib/`. | +| `frontend/src/pages/ShortForm/steps/Upload.jsx` | Probes video duration via a hidden `<video preload="metadata">` element when files are added. Stores `durationSec` on each file entry. Surfaces in the file list as `5.3 MB · 42s`. | +| `frontend/src/pages/ShortForm/steps/Categorize.jsx` | Gates the "Start processing →" button on `keys.gemini`. Shows an amber banner with an "Open Settings →" link when no key is set. Users can no longer reach Processing in an unrunnable state. | +| `frontend/src/pages/ShortForm/steps/Processing.jsx` | Rewrite of the wait UX (full file). Three behavior changes: | +| | 1. **Reactive to `keys.gemini`** — `useEffect([keys.gemini])` instead of `useEffect([])` + `startedRef`. If you land here without a key and set one in another tab, the wizard kicks off jobs the moment the key arrives. | +| | 2. **ETA on the right instead of job ID.** Uses `file.durationSec * 1.2`, floored at 25 s, fallback 60 s. Counts down once per second. Shows "taking longer than expected…" if we overrun by 10 s. | +| | 3. **Specific wait states.** Replaced flat "Queued…" with `awaiting_key` / `uploading` / `queued` / `processing` (real backend log) / `complete` / `error`, each with its own caption and icon. The amber `KeyRound` icon appears when no key is set. | -### `frontend/` +The Short-form Processing rewrite was smoke-tested in the browser: -``` -frontend/ -├── Dockerfile # node:18-alpine + Vite dev server -├── package.json -├── vite.config.js # Proxies /api, /videos, /thumbnails, /gallery, /video → backend:8000 -│ # Proxies /render → renderer:3100 (internal Docker network) -├── tailwind.config.js -├── index.html -└── src/ - ├── App.jsx # Tab switcher + state (jobs, api keys, etc.) - ├── Landing.jsx # Marketing landing page - ├── Legal.jsx # Terms & Privacy - ├── main.jsx, index.css, App.css, config.js - ├── lib/ - │ ├── brandKit.js # NEW — localStorage helpers + useBrandKit hook + wrapByWords - │ └── renderInBrowser.js - ├── components/ - │ ├── KeyInput.jsx # API-key paste for Gemini - │ ├── BrandKit.jsx # NEW — main brand kit UI - │ ├── BrandPreview.jsx # NEW — canvas live preview with chunk cycling - │ ├── FontPicker.jsx # NEW — scrollable font list (each name in its own font) - │ ├── PositionGrid.jsx # NEW — 3x3 anchor selector - │ ├── MediaInput.jsx # URL / file upload box - │ ├── ResultCard.jsx # Generated clip viewer - │ ├── HookModal.jsx # Hook overlay editor (pre-fills from brand kit) - │ ├── SubtitleModal.jsx # Subtitle burn-in editor (pre-fills from brand kit) - │ ├── TranslateModal.jsx # ElevenLabs dubbing - │ ├── ThumbnailStudio.jsx # YouTube Studio panel - │ ├── SaaShortsTab.jsx # SaaS UGC pipeline UI - │ ├── UGCGallery.jsx # UGC video browser - │ ├── Gallery.jsx, GalleryCard.jsx - │ ├── RemotionPreview.jsx # Remotion in-browser preview wrapper - │ ├── ProcessingAnimation.jsx - │ └── ScheduleWeekModal.jsx - └── remotion/ # In-browser Remotion compositions -``` +- Upload demo MP4 → list shows `5.3 MB · 42s` (duration probe works). +- Advance to Categorize without a key → amber banner + button disabled. +- Set the key from Settings → banner clears live, button enables (no reload). +- Continue to Processing → row shows "🎙️ Transcribing video with Faster-Whisper (CPU Optimized)…" with "~45s left" on the right. -### `renderer/` +The third end-to-end run (real Gemini key, demo MP4 → finished clip in Review) was kicked off at the end of the session. The next agent should: -``` -renderer/ -├── service/ # Standalone TypeScript microservice -│ ├── Dockerfile # node:18 + Chromium + ffmpeg -│ ├── package.json -│ ├── tsconfig.json -│ └── src/ -│ ├── server.ts # HTTP server on 3100 -│ ├── bundle.ts # Bundles Remotion compositions -│ └── render-worker.ts # Headless Chromium renderer -└── compositions/ # Remotion compositions (TSX) - ├── package.json - └── src/ # All composition files -``` +1. Verify the run reached Review cleanly (`docker logs openshorts-backend | grep "Total execution time"` should show ~40 s). +2. Confirm with the user, then commit the working tree. Suggested message: + + ``` + fix(short-form): reactive key check + ETA + specific wait states + + - Processing.jsx: drop startedRef gate, depend on keys.gemini; + show ETA from probed duration instead of job_id hash; split + the queued state into awaiting_key / uploading / queued / + processing (with real backend log). + - Upload.jsx: probe video duration via HTMLVideoElement on add. + - Categorize.jsx: amber gate banner + disabled "Start processing" + when no Gemini key. + - CLAUDE.md: add isolation rule (Convention #7) — short-form and + long-form must not cross-import; shared logic lives under + hooks/ components/ui/ state/ lib/. + ``` -### `assets/`, `scripts/`, runtime dirs +--- -``` -assets/ -├── fonts/ -│ ├── NotoSerif-Bold.ttf # Used by hook overlays -│ └── user/ # User-uploaded fonts (brand kit, persistent) -└── screenshots/ # For README.md only +## 5. Outstanding work -scripts/ -├── update_claude_md.py # AST-based; regenerates 3 auto-managed CLAUDE.md sections -└── install_hooks.sh # One-time pre-commit hook installer +Ordered by what'll bite next. -output/, uploads/ # Runtime (gitignored) -``` +### 🟡 Immediate (next session) + +- **Same Categorize gate + reactive Processing pattern for long-form.** Long-form's Processing step is currently simulated (no backend), so the missing-key path doesn't bite — but when chapter detection ships (TODO #6 below), the same gate is required. Mirror the short-form pattern; the isolation rule in CLAUDE.md (Convention #7) means **copy the logic, don't import it.** +- **Codex re-run before merge.** Run `/codex:adversarial-review` once you've confirmed the working-tree commit, both to verify the H1/H2/M3 fixes land cleanly and to surface anything the first pass deferred. Reference task: `task-mpdeyzjz-vpdetv` (completed 2026-05-20 02:01 UTC). +- **Pre-existing OpenAPI snapshot drift.** `backend/tests/api/test_openapi_contract.py` fails 1/62 — Pydantic emits `contentMediaType: application/octet-stream` for file fields where the baseline has `format: binary`. No route changes; pure schema-serialization drift. Regenerate per §12, or pin Pydantic. + +### 🟢 Backend TODOs the UI is already wired for + +Each is referenced inline in the UI as `// TODO(backend): plan TODO #N` at the stub site: + +1. **`POST /api/process/batch`** — accept up to 5 files in one call. Replaces the per-file loop in short-form Processing. +2. **`POST /api/categorize`** — classify a clip as Educational / Yap / Live / Viral (today defaults are pre-selected in Categorize.jsx). +3. **Per-category layout branches** in `backend/app/video/pipeline.py`. +4. **Silence removal** — `silencedetect` filter integration in `backend/app/video/ffmpeg.py`. +5. **LUT color grade** — `lut3d` filter integration. +6. **Chapter detection for long-form** — wire PySceneDetect (already imported) behind a new route. +7. **`POST /api/long-form/export-segment`** — re-render a chapter range as a short. +8. **Intro / outro overlay insertion.** +9. **🔴 Notification API gap (HIGH-VALUE).** Make `/api/social/post` enqueue into a `publish_jobs` queue (mirror the thumbnail flow at `backend/app/main.py:1565-1620`) and add `GET /api/social/publish/status/{publish_id}`. Until this lands the notification bell can't advance past `submitted`. +10. **`GET /api/clips/recent?limit=20`** — Dashboard live stat source. +11. **WebSocket / SSE** for real-time updates (optional; 2 s polling is acceptable v1). + +### 🟢 Backend security baseline for `POST /api/process` (STATE-MUTATING) + +The fix-smoke commit landed C3 (input validation) and re-confirmed C8 (concurrency lock via the job-queue semaphore) and C9 (attestation log). A future `/gsd-secure-phase` pass should land: + +- **C2 — Rate limit.** Per-IP and per-key caps. `MAX_CONCURRENT_JOBS` is process-wide, not per-caller. +- **C4 — Timeout / breaker.** `run_job` spawns a Python subprocess with no timeout. A 15-min hard cap + breaker on repeated subprocess crashes (e.g. yt-dlp 403s) would prevent zombie jobs. +- **C7 — Idempotency.** Accept `Idempotency-Key` header; dedup window keyed on `(api_key_fingerprint, file_sha256 OR url)` for ~5 min. +- **C10 — Abuse / cost cap.** BYOK means cost lands on the user, so the host-side concern is volume — burst-rate kill switch + per-IP/day quota. + +Full tier→control matrix at `~/.claude/skills/securing-http-and-llm-endpoints/`. Invoke that skill before editing any route handler. + +### 🟢 Frontend polish backlog + +- **Dashboard.jsx** — "CLIPS PROCESSED: 1" StatCard shows the count but the sub-caption still reads "No batches yet". Caption is a separate field; threshold-derived sub-copy is the fix. +- **`LongForm/steps/Processing.jsx`** — `useRef` is still imported (used by `savedRef`); harmless, but worth a tidy if/when the `savedRef` pattern itself gets replaced. +- **`assets/fonts/user/`** — already persistent across restarts, but there's no UI to *delete* an uploaded font. + +### 🟢 Infra + +- **Docker Compose anonymous volume gotcha.** `/app/node_modules` in `docker-compose.yml` masks freshly-installed npm deps after a `package.json` change. `react-router-dom` was missing in the container on first smoke-test run despite being in `package.json`. Permanent fix: either drop the anonymous volume from the `frontend` service (slower first builds, but no surprises), or document `docker compose down -v && docker compose up --build` in the README. **This will bite anyone who fresh-clones the repo.** +- **OpenAPI snapshot regen workflow** — see §12. + +### 🟢 Larger features (technical roadmap) + +See `ROADMAP.md` for full designs. Shipping order: + +1. **Feature C — Motion Graphics Library.** Lowest blast radius; reuses the hook-overlay pattern. +2. **Feature A — Background Soundtracks + SFX with Ducking.** Needs the FFmpeg-wrapper migration finished first. +3. **Feature B — Layout Templates.** Last because it rewrites the per-frame loop. + +### 🟢 Deferred refactors + +- Full router split of `backend/app/main.py` (2 256 lines, 32 routes → 11 routers + `create_app()` factory). The OpenAPI snapshot is the gate. +- Migrate every `subprocess.run(['ffmpeg', ...])` to `backend/app/video/ffmpeg.py`. One caller per commit. +- Split `backend/app/saas/pipeline.py` (1 474 lines) into `research / scripting / media / compositing / pipeline`. +- Extract `backend/app/core/{job_store, api_keys}.py` — land alongside the router split, not before. --- -## 5. Tech stack - -### Backend (Python 3.11) -- **FastAPI** + Uvicorn — async API -- **faster-whisper** — local speech-to-text (CPU INT8) -- **MediaPipe BlazeFace** + **YOLOv8n** (ultralytics) — face/person detection -- **PySceneDetect** — scene boundary detection -- **OpenCV** — frame I/O -- **yt-dlp** — YouTube ingest -- **FFmpeg** (system binary) — encode/overlay/burn/mux -- **httpx**, **boto3**, **google-genai** — API clients -- **Pillow** — hook image rendering - -### Frontend (Node 18) -- **React 18**, **Vite 4**, **Tailwind CSS 3.4** -- **lucide-react** for icons -- **Remotion** compositions (in-browser preview + server-side render) - -### External services (paid APIs; user supplies keys) -| Service | Used for | Where | -| --- | --- | --- | -| **Google Gemini 2.5 Flash** | Viral moment extraction, video effects, thumbnail titles, descriptions | `app.ml.viral_extraction`, `app.editing`, `app.thumbnails` | -| **ElevenLabs Dubbing** | Voice translation 30+ languages | `app.integrations.elevenlabs` | -| **fal.ai** (Flux + Kling/Hailuo) | SaaSShorts UGC actor generation | `app.saas.pipeline` | -| **Upload-Post** | Social media distribution | `app.main` (`/api/social/*`) | -| **AWS S3** | Clip backup + public gallery | `app.integrations.s3` | +## 6. Operating rules -### Cost story -- **All transcription / face tracking / scene detection / video processing runs LOCALLY** — no API cost. -- **Gemini is the only required paid API** (has free tier; personal use likely $0). -- Everything else (ElevenLabs / fal.ai / Upload-Post / S3) is opt-in. +These come from the user's instructions + lessons from this and prior sessions. Respect them or you'll either break something or lose user trust. + +1. **Don't merge to `main` without explicit approval.** The branch has 27 commits + uncommitted work; the user reviews before merge. +2. **Don't push.** The remote `mutonby/openshorts` is read-only for the active gh account. If the user asks for a PR, fork or have them switch accounts — see §2. +3. **Compaction check-ins every 2 phases.** When executing a multi-phase plan, pause after every 2 phases and ask "Finished phases X and Y — keep going, or compact context first?" Don't wait to be asked. +4. **Codex adversarial review is required** when (a) modifying security-sensitive code (auth, crypto, payments, secrets, tokens, user input, SQL/ORM, file I/O, network/HTTP) or (b) marking a phase or feature complete. Use `/codex:adversarial-review --background "..."`. +5. **HTTP/LLM endpoint security skill is required** when inspecting, planning, modifying, or auditing code that contains an HTTP endpoint or LLM call. Invoke `securing-http-and-llm-endpoints` before touching `backend/app/main.py` route handlers. Frontend consumers of existing endpoints don't trigger this. +6. **Verification before completion is non-negotiable.** Don't say "fixed" or "done" without running the verification command in the same response. `npm run build` is not a runtime test. For UI changes, exercise in the browser via chrome-devtools MCP. The previous agent shipped Phases 1–4 claiming success without ever opening a browser — three live BLOCKER bugs surfaced the moment a real user touched it. +7. **Don't touch FFmpeg `subprocess.run` calls** unless you're doing the full wrapper migration — dozens of callers, all currently working. +8. **The OpenAPI snapshot test will fail** the moment you add or remove any route. Regen procedure in §12. +9. **Short-form and long-form are isolated.** No cross-imports between `frontend/src/pages/ShortForm/` and `frontend/src/pages/LongForm/`. If logic is genuinely shared (wizard state machine, UI primitives, stores), it lives in `frontend/src/hooks/`, `components/ui/`, `state/`, or `lib/`. Codified as CLAUDE.md Convention #7. +10. **Backend status vocab ≠ wizard vocab.** Backend says `queued | processing | completed | failed` with `result` (singular). Wizard expects `queued | processing | complete | error` with `result`. Always normalize at the read site — see `Processing.jsx:normalizeJobPayload`. +11. **File objects don't survive `JSON.stringify`.** Wizards persist `wizard.data` to localStorage. If a step depends on a `File`, use the `resetOnRehydrate` predicate in `useWizard` to force the wizard back to Upload when File handles are degraded. +12. **The Gemini key has two homes.** Server-side fallback is `.env GEMINI_API_KEY` (gitignored). Client-side override is `localStorage.gemini_key` (read by `keysStore.js`, sent as `X-Gemini-Key` header). The short-form wizard requires the client-side header — set both for a smooth UX. --- -## 6. APIs +## 7. Architecture quick-reference -### Server-side env vars (`.env`) -Only one is required: `GEMINI_API_KEY`. See `.env.example` for the full template. +For full detail read `CLAUDE.md`. This is the version you can scan in 30 s. -```bash -# Required -GEMINI_API_KEY= +### Top-level layout + +``` +openshorts/ +├── backend/ 🐍 Python FastAPI (Python 3.11 in Docker) +├── frontend/ ⚛️ React + Vite + Tailwind + react-router-dom v6 +├── renderer/ 🎬 Remotion render microservice (TypeScript) +├── assets/ 🖼️ Committed static files (fonts + screenshots) +├── scripts/ 🛠️ Dev tooling (CLAUDE.md auto-updater + pre-commit installer) +├── output/ (gitignored runtime) +├── uploads/ (gitignored runtime) +├── docker-compose.yml +├── CLAUDE.md ← auto-managed sections regenerated by pre-commit hook +├── ROADMAP.md ← product roadmap + technical roadmap + smoke-test follow-ups +├── HANDOFF.md ← you are here +└── .env.example ← template; copy to .env before running +``` + +### Backend (`backend/app/`) + +Layered Python package. Each subfolder has a one-line purpose in its `__init__.py`. + +| Folder | Rule | +| --- | --- | +| `core/` | Cross-cutting infra: job queue, job store, API-key resolver, logging. **Scaffolded; today these globals still live in `main.py`.** | +| `routes/` | One module per API domain. **Scaffolded; today everything is in `main.py`.** | +| `video/` | All video work. **FFmpeg only via `video/ffmpeg.py`.** (Migration in progress; many callers still use raw subprocess.) | +| `ml/` | AI inference: face/person detection, transcription, viral extraction. | +| `audio/` | Future feature A — scaffolded. | +| `layouts/` | Future feature B — scaffolded. | +| `motion_graphics/` | Future feature C — scaffolded. | +| `editing/` | AI-generated FFmpeg filter pipeline. | +| `overlays/` | Hook cards + subtitle generation / burn-in. | +| `ingest/` | YouTube downloads + local upload handling. | +| `saas/` | SaaSShorts UGC pipeline (1474 lines; internal split deferred). | +| `integrations/` | External-service clients (S3, ElevenLabs). | +| `thumbnails/` | YouTube thumbnail workflow. | +| `prompts/` | Externalized Gemini prompt templates (`.md`). | +| `models/` | Pydantic schemas. **Scaffolded; today they're inline in `main.py`.** | +| `utils/` | Shared helpers: filter sanitization, path utilities. | + +### Frontend (`frontend/src/`) + +``` +src/ +├── App.jsx # 47 lines: <Routes> + nested Settings routes +├── main.jsx # <HashRouter> + Landing/Legal/App boot +├── Landing.jsx, Legal.jsx +├── layouts/ +│ ├── AppShell.jsx # Sidebar + Header + <Outlet/> + useJobPolling +│ ├── Sidebar.jsx # 210 px, 5 items +│ └── Header.jsx # 50 px, title + <NotificationBell /> +├── pages/ +│ ├── Dashboard.jsx +│ ├── ClipGenerator.jsx # original /api/process flow +│ ├── ShortForm/ # 4-step wizard — see §3 for the gotchas +│ │ ├── index.jsx +│ │ ├── Wizard.jsx +│ │ ├── History.jsx +│ │ └── steps/{Upload,Categorize,Processing,Review}.jsx +│ ├── LongForm/ # 4-step wizard, processing is simulated +│ │ ├── index.jsx +│ │ ├── Wizard.jsx +│ │ ├── History.jsx +│ │ └── steps/{Upload,Settings,Processing,Editor}.jsx +│ ├── Settings/ +│ │ ├── index.jsx # VS-Code 180 px nav + <Outlet/> +│ │ └── sections/* # BrandKit, ApiKeys, placeholders +│ ├── Legacy/ # /legacy/* — SaaSShorts, Thumbnails, UGC, AI Agent +│ └── PageStub.jsx +├── state/ # Custom event-based stores (no Context, no library) +│ ├── keysStore.js # gemini/uploadPost/elevenLabs/fal + uploadUserId +│ ├── jobStore.js # active Clip Generator job +│ └── notificationsStore.js # bell feed (max 50) +├── hooks/ +│ ├── useJobPolling.js # canonical reference for /api/status mapping +│ └── useWizard.js # 4-step state machine with resetOnRehydrate +├── lib/ +│ ├── brandKit.js # localStorage + useBrandKit hook +│ ├── crypto.js # XOR + Base64 obfuscation for client keys +│ └── renderInBrowser.js +└── components/ + ├── ui/ # Tooltip, NotificationBell, PhoneFrame, SnakeGame, ... + └── *.jsx # KeyInput, BrandKit, ResultCard, modals, ... +``` + +### API surface + +35 endpoints, locked in `backend/tests/snapshots/baseline.openapi.json`. Key routes: -# Optional +| Method | Route | Purpose | +| --- | --- | --- | +| POST | `/api/process` | Submit a video (URL or upload). MP4/MOV signature + size validated. | +| GET | `/api/status/{job_id}` | Poll status + logs. Vocab: `queued | processing | completed | failed`. | +| POST | `/api/edit` | Apply Gemini-generated FFmpeg filters. | +| POST | `/api/effects/generate` | Get a structured EffectsConfig for Remotion. | +| POST | `/api/render/{render_id}` | Render via the Remotion microservice. | +| POST | `/api/subtitle` | Generate + burn subtitles (accepts `words_per_line` from brand kit). | +| POST | `/api/hook` | Burn a text-hook PNG. | +| POST | `/api/translate` | ElevenLabs dubbing. | +| POST | `/api/social/post` | Distribute via Upload-Post (sync today — see TODO #9). | +| POST | `/api/thumbnail/*` | YouTube thumbnail workflow. | +| POST | `/api/saasshorts/*` | SaaS UGC pipeline (legacy). | +| GET | `/api/fonts`, POST `/api/fonts/upload` | Font catalog + upload. | + +### Environment + +Server-side env (`.env`, gitignored): + +```bash +GEMINI_API_KEY= # required; the user has theirs set AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= AWS_REGION=eu-west-3 AWS_S3_BUCKET= AWS_S3_PUBLIC_BUCKET= -YOUTUBE_COOKIES= DISABLE_YOUTUBE_URL=false -RENDER_SERVICE_URL=http://renderer:3100 # Docker INTERNAL URL — do NOT change to localhost:3003 +RENDER_SERVICE_URL=http://renderer:3100 # ← Docker-INTERNAL URL; do NOT change MAX_CONCURRENT_JOBS=5 VITE_API_URL=http://localhost:3002 ``` -### Client-side keys (NOT in .env) -- `ELEVENLABS_API_KEY`, `UPLOAD_POST_API_KEY`, `FAL_KEY` — these are pasted into the **dashboard UI**, encrypted in browser `localStorage`, and sent as HTTP headers per request (`X-ElevenLabs-Key`, `X-Upload-Post-Key`, `X-Fal-Key`). The backend **never stores them**. - -### Internal HTTP surface (35 endpoints; full list in `backend/tests/snapshots/baseline.openapi.json`) -Key routes: -- `POST /api/process` — submit a video for processing -- `GET /api/status/{job_id}` — poll status -- `POST /api/edit` — apply AI-generated FFmpeg filters -- `POST /api/subtitle` — generate + burn subtitles (accepts `words_per_line` from brand kit) -- `POST /api/hook` — burn text hook PNG -- `POST /api/translate` — ElevenLabs dubbing -- `POST /api/effects/generate`, `POST /api/render/{render_id}` — Remotion render pipeline -- `POST /api/thumbnail/*` — YouTube thumbnail workflow -- `POST /api/saasshorts/*` — SaaS UGC pipeline -- `POST /api/social/post` — Upload-Post distribution -- **NEW** `GET /api/fonts` — list system + bundled + user-uploaded fonts -- **NEW** `POST /api/fonts/upload` — multipart `.ttf/.otf/.woff/.woff2` upload (10 MB cap) -- **NEW** `GET /api/fonts/file/{name}` — serve bundled font -- **NEW** `GET /api/fonts/file/user/{name}` — serve user-uploaded font - ---- - -## 7. Processing pipeline (11 stages) - -1. **Ingest** — `app/ingest/youtube.py:download_youtube_video()` or local upload -2. **Transcribe** — `app/ml/transcription.py:transcribe_video()` (faster-whisper, word timestamps) -3. **Scene-detect** — `app/video/scene_analysis.py:detect_scenes()` -4. **Viral extraction** — `app/ml/viral_extraction.py:get_viral_clips()` (Gemini picks 3–15 clips, 15–60 s each) -5. **Cut clips** — FFmpeg `-ss`/`-to` -6. **Strategy** — `app/video/scene_analysis.py:analyze_scenes_strategy()` (TRACK vs GENERAL per scene) -7. **Reframe** — `app/video/pipeline.py:process_video_to_vertical()` (per-frame loop with `SmoothedCameraman`) -8. **Effects** (optional) — `app/editing/ai_filters.py:VideoEditor` -9. **Hooks + subtitles** (optional) — `app/overlays/*` -10. **Translate** (optional) — `app/integrations/elevenlabs.py:translate_video()` -11. **Backup + distribute** — S3 + Upload-Post +Client-side keys (`ELEVENLABS_API_KEY`, `UPLOAD_POST_API_KEY`, `FAL_KEY`, plus the Gemini override) are set in Settings → API Keys, XOR+Base64-obfuscated in localStorage, sent as `X-…-Key` headers per-request. ---- +### Frontend routes (HashRouter — paths after `#/`) -## 8. History of work done in this session - -Single chronological narrative — everything that landed since the user started the session. - -### Phase 0 — Test safety net (commit `2d7eff5`) -Built a characterization test suite BEFORE touching code: -- `tests/conftest.py` with `sys.modules` stubs for heavy ML deps (cv2, mediapipe, ultralytics, torch, yt_dlp, faster_whisper, google.genai, boto3) so unit tests run in milliseconds without needing GPUs or network. -- `tests/unit/` — `test_tracking.py`, `test_filter_sanitization.py`, `test_srt_generation.py`, `test_hook_image.py`, `test_translate_languages.py`. -- `tests/api/test_openapi_contract.py` with the 32-endpoint OpenAPI snapshot (`baseline.openapi.json`). -- `tests/e2e/test_pipeline_smoke.py` — slow canary skipped by default. -- Tagged the pre-restructure state as `pre-restructure-20260519-1526`. Branched to `chore/restructure-and-docs`. - -### Phase 1 — First restructure (commits `d7c5a58` … `84310c7`, 11 commits) -Created an `openshorts/` Python package and moved every root-level monolith into it incrementally: -- `s3_uploader.py` → `openshorts/integrations/s3.py` -- `translate.py` → `openshorts/integrations/elevenlabs.py` -- `hooks.py` → `openshorts/overlays/hooks.py` -- `subtitles.py` → split into `openshorts/overlays/subtitles_{generate,render}.py` -- `editor.py` → `openshorts/editing/ai_filters.py` + `openshorts/utils/filters.py` (shared helpers) + `openshorts/editing/prompts.py` -- `thumbnail.py` → 3 files under `openshorts/thumbnails/` -- `main.py` → split into `video/{pipeline,tracking,scene_analysis,reframing}.py` + `ml/{detection,transcription,viral_extraction}.py` + `ingest/youtube.py` -- `saasshorts.py` → `openshorts/saas/pipeline.py` (single-file move; internal split deferred) -- `openshorts/app.py` — thin re-export of root `app.py`'s FastAPI instance -- Each old root file became a **shim** that re-exported the new paths for backwards compat. -- Added `openshorts/video/ffmpeg.py` wrapper scaffold (migration of existing `subprocess.run(['ffmpeg', ...])` calls deferred). -- Updated `Dockerfile` `CMD` to `uvicorn openshorts.app:app`. - -### Phase 3 — `.env.example` (commit `6496d69`) -Added every env var the code actually reads. Clarified which keys are server-side vs client-side (headers). - -### Phase 4 — Auto-updater tooling (commit `a32e8e5`) -- `scripts/update_claude_md.py` — AST-based parser that regenerates 3 sections of CLAUDE.md between markers (`<!-- AUTO:REPO-MAP:START/END -->`, `<!-- AUTO:MODULE-MAP:START/END -->`, `<!-- AUTO:ENV:START/END -->`). Exits non-zero if any `.py` under the package lacks a module docstring (enforces the "every module has a one-liner" rule). -- `scripts/install_hooks.sh` — wires up the pre-commit hook. -- `.pre-commit-config.yaml` — runs the updater on every commit. - -### Phase 2 — CLAUDE.md rewrite (commits `726bfd3`, `9e68944`) -- Full rewrite with structured sections: Project, Quick start, Where things go (decision table), Repo layout (auto-managed), Backend package, Module map (auto-managed), Processing pipeline, API surface, Environment (auto-managed), Conventions, Pointers. -- Sub-`CLAUDE.md` stubs at directory boundaries: `video/`, `layouts/`, `motion_graphics/`, `audio/`, `prompts/`. - -### Phase 5 — ROADMAP.md (commit `1dd4b9a`) -Designs for three future features in shipping order: -1. **Motion graphics** (animated overlays + multi-effect compositor) — ships first because its compositor is the prerequisite for ducking audio. -2. **Audio soundtracks + ducking** — uses Whisper word timings or FFmpeg `silencedetect` to duck music during speech. -3. **Layout templates** — abstract `Layout` base class; `VerticalPanoramaLayout` wraps current behavior; `EducationalLayout` uses two cameramen (top: source crop, bottom: presenter headshot). - -Also documents deferred refactors: full router split, FFmpeg-wrapper migration, saasshorts internal split, `core/job_store` + `core/api_keys` extraction. - -### Phase 6 — Second restructure (commit `55f0ef1`, the big one) -User pushed back on the leftover root shims and the unclear backend/frontend split. Did the proper monorepo split: -- `openshorts/` (Python package) → `backend/app/` -- Root `app.py` (2256 lines) → `backend/app/main.py` (shim imports rewritten to `app.integrations.s3`, `app.editing.ai_filters`, etc.) -- Root `main.py` CLI → `backend/app/cli.py` -- 9 root `.py` shims + 3 `verify_*.py` scripts → **deleted** -- `tests/` → `backend/tests/` -- `pyproject.toml`, `Dockerfile`, `requirements*.txt` → `backend/` -- `dashboard/` → `frontend/` -- `render-service/` + `remotion/` → `renderer/service/` + `renderer/compositions/` -- `fonts/` + `screenshots/` → `assets/fonts/` + `assets/screenshots/` -- `hooks.py` font path now auto-resolves by walking up the directory tree to find `assets/fonts/` -- `docker-compose.yml` updated for all new paths (entrypoint `app.main:app`) -- `scripts/update_claude_md.py` updated for new layout -- CLAUDE.md fully rewritten with the new layout - -### Phase 7 — Docker + port mapping -- Switched host ports from `5175/8000/3100` → `3001/3002/3003` (consecutive, easier to remember). -- **Container internal ports unchanged** (still 5173/8000/3100). Only host mappings changed. -- Vite proxy config (`vite.config.js`) untouched — it uses Docker internal service names (`http://backend:8000`, `http://renderer:3100`), which is correct. -- Updated `.env`, `.env.example`, README, CLAUDE.md port references. - -### Phase 8 — Brand Kit (UNCOMMITTED — section 9) +| Path | Page | +| --- | --- | +| `/` | → `/dashboard` | +| `/dashboard` | StatCards + scheduled + recent activity | +| `/short-form`, `/short-form/history` | wizard + history | +| `/long-form`, `/long-form/history` | wizard + history | +| `/clip-generator` | original `/api/process` flow | +| `/settings/general/brand-kit` | brand kit editor | +| `/settings/general/{subtitle-style,color-presets,export-defaults}` | placeholders | +| `/settings/platforms/:platform` | placeholders (YouTube/TikTok/Instagram/Snapchat/Facebook) | +| `/settings/system/api-keys` | API key paste UI | +| `/settings/system/history` | placeholder | +| `/legacy/{saasshorts,thumbnails,ugc,ai-agent}` | original tabs | --- -## 9. Brand Kit feature (latest, UNCOMMITTED) - -The user wanted a Hormozi-style "brand kit" so all subtitle/hook/effect text shares colors, fonts, and styling. Built across three iterations. +## 8. Brand Kit (already shipped) -### Architecture - -``` -frontend/src/lib/brandKit.js ← localStorage CRUD + useBrandKit hook + helpers -frontend/src/components/BrandKit.jsx ← the main UI block (rendered in Settings tab) -frontend/src/components/BrandPreview.jsx ← canvas live preview with chunk cycling -frontend/src/components/FontPicker.jsx ← scrollable font list (each name in its own font) -frontend/src/components/PositionGrid.jsx ← 3x3 anchor selector -``` - -### Storage shape (in browser `localStorage` under key `openshorts.brandKit.v2`) +User-facing brand kit lives at `/settings/general/brand-kit`. Storage shape: ```js +// localStorage key: openshorts.brandKit.v2 { - colors: [ - { name: 'Primary', hex: '#FFFFFF' }, - { name: 'Accent', hex: '#FFD60A' }, - { name: 'Stroke', hex: '#000000' }, - // ...user-added colors - ], - font: { family: 'Inter', source: 'system' | 'bundled' | 'user', url: null | '/api/fonts/file/...' }, - previewText: 'Stop scrolling and watch this insane clip…', + colors: [{name, hex}, ...], // 3 defaults + user-added + font: {family, source, url}, // 'system' | 'bundled' | 'user' + previewText: '...', styles: { - '9:16': { size, strokeWidth, textColor, strokeColor, position, wordsPerLine }, - '16:9': { size, strokeWidth, textColor, strokeColor, position, wordsPerLine }, + '9:16': {size, strokeWidth, textColor, strokeColor, position, wordsPerLine, textCase}, + '16:9': {size, strokeWidth, textColor, strokeColor, position, wordsPerLine, textCase}, }, } ``` -Auto-migrates from the legacy v1 shape (`{ style: {...} }`) if found. +Auto-migrates from legacy v1 on first read. -### Position uses a 3×3 anchor grid +**Wired:** `SubtitleModal`, `HookModal` consume `useBrandKit()`. `ResultCard.handleSubtitle` posts `words_per_line` through to `POST /api/subtitle`. Font upload lands in `assets/fonts/user/` (volume-mounted, persists). -Values: `top-left`, `top-center`, `top-right`, `middle-left`, `middle-center`, `middle-right`, `bottom-left`, `bottom-center`, `bottom-right`. Standard Figma pattern. +**Not yet wired:** -### Per-aspect-ratio settings +- `POST /api/hook` doesn't yet accept brand-kit colors/font; `backend/app/overlays/hooks.py:create_hook_image()` still uses bundled NotoSerif-Bold. +- AI-effect Gemini prompts in `backend/app/editing/prompts.py` are brand-kit-unaware. +- `HookModal` uses top/middle/bottom positioning — a full 9-anchor picker would respect horizontal alignment too. -**Shared across ratios:** colors + font family (brand identity). -**Per-ratio:** size, stroke width, position, words-per-line (layout-specific). +--- -Default values: -- **9:16**: size 72 px, stroke 6 px, bottom-center, 2 words/line (Hormozi style) -- **16:9**: size 48 px, stroke 4 px, bottom-center, 10 words/line (full sentences) +## 9. Processing pipeline (Clip Generator hot path) -### Live preview behavior (KEY UX) +1. **Ingest** — `backend/app/ingest/youtube.py:download_youtube_video()` or local upload (now with signature check). +2. **Transcribe** — `backend/app/ml/transcription.py:transcribe_video()` (faster-whisper, word timestamps). +3. **Scene-detect** — `backend/app/video/scene_analysis.py:detect_scenes()`. +4. **Viral extraction** — `backend/app/ml/viral_extraction.py:get_viral_clips()` (Gemini picks 3–15 clips, 15–60 s each). +5. **Cut clips** — FFmpeg `-ss`/`-to`. +6. **Strategy** — `backend/app/video/scene_analysis.py:analyze_scenes_strategy()` (TRACK vs GENERAL per scene). +7. **Reframe** — `backend/app/video/pipeline.py:process_video_to_vertical()` (per-frame loop with `SmoothedCameraman`). +8. **Effects** (optional) — `backend/app/editing/ai_filters.py:VideoEditor`. +9. **Hooks + subtitles** (optional) — `backend/app/overlays/*`. +10. **Translate** (optional) — `backend/app/integrations/elevenlabs.py:translate_video()`. +11. **Backup + distribute** — S3 + Upload-Post. -The preview shows **one chunk on screen at a time**, cycling every 1.5 s — exactly like real subtitle burn-in flashes one block on screen, not all of them stacked. +Observed runtime on the demo MP4 (41 s, 5.3 MB): **~39 s total** end-to-end. Per-frame reframing dominates. -Below the canvas: -- Pause/Play button -- One dot per chunk (current one highlighted wider) -- `N/total` counter -- Clicking a dot jumps to that chunk and pauses +The short-form wizard fans out one `/api/process` call per uploaded file in parallel (backend batch endpoint is TODO #1). -The preview text is editable. Word count is shown. Reset button restores default. +The long-form wizard's Processing step is **simulated** — no backend call. Chapter detection is TODO #6, segment export is TODO #7. -### Font upload flow +--- -- Drag-drop `.ttf/.otf/.woff/.woff2` into the FontPicker dropdown (10 MB cap). -- POST to `/api/fonts/upload` → saves under `assets/fonts/user/` (persistent across container restarts via volume mount). -- `GET /api/fonts` returns catalog: system (curated 9 names: Inter, Roboto, Arial, …), bundled (NotoSerif from `assets/fonts/`), user (anything in `user/`). -- `GET /api/fonts/file/{name}` serves bundled font. -- `GET /api/fonts/file/user/{name}` serves user-uploaded font. -- Browser registers them as `@font-face` via `ensureFontLoaded()` so the picker can render each name in its own typeface. +## 10. Frontend UI architecture (Phases 1–4 + smoke-test follow-ups) -### How brand kit flows into actual output +### Shell -1. **SubtitleModal** (`frontend/src/components/SubtitleModal.jsx`) uses `useBrandKit()` hook. On open, pre-fills `position`, `fontSize`, `fontName`, `fontColor`, `borderColor`, `borderWidth`, `wordsPerLine` from the live brand kit (the `'9:16'` block — all subtitle output is currently vertical). -2. **HookModal** (`frontend/src/components/HookModal.jsx`) does the same for hook overlay defaults. -3. On submit, `ResultCard.handleSubtitle` posts the full config to `POST /api/subtitle` including `words_per_line`. -4. Backend `SubtitleRequest` Pydantic model (in `backend/app/main.py`) accepts `words_per_line: Optional[int]`. -5. Backend threads it through to `generate_srt(transcript, …, max_words=N)` in `backend/app/overlays/subtitles_generate.py`. If `max_words` is set, SRT blocks are grouped by word count instead of character count. -6. FFmpeg subtitle burn-in renders the SRT — N words flash on screen at a time. +`<HashRouter>` mounted in `main.jsx`. Required because Landing/Legal/App selection still keys off bare `#app` / `#legal` hashes; HashRouter uses `#/path` so it doesn't collide. `resolveView()` was updated to treat `#/`-prefixed hashes as in-app — **preserve this if you ever rewrite the boot logic**. -### Live update mechanism +`layouts/AppShell.jsx` renders Sidebar (210 px) + Header (50 px) + `<Outlet/>`. Calls `useJobPolling()` once globally so the active Clip Generator job keeps polling regardless of which page is open. -`brandKit.js` exposes a `useBrandKit()` React hook that subscribes to: -- `brandKit:changed` custom event (fired on every `saveBrandKit()`) -- `storage` event (fired when localStorage changes in another tab) +### Theme tokens (`tailwind.config.js`) -This means: when the user edits the brand kit, any open SubtitleModal/HookModal sees the new values immediately. No restart needed. +```js +colors: { + background: "#0c0c0c", sidebar: "#111111", surface: "#141414", border: "#1e1e1e", + primary: "#5b5ef4", accent: "#5b5ef4", success: "#34d470", + platform: { youtube: "#f87171", tiktok: "#a5a8fd", instagram: "#f0abfc", + snapchat: "#facc15", facebook: "#1877f2" }, +} +``` -### What's currently NOT wired (deferred — section 12) +Utility classes in `index.css` (`.btn-primary`, `.input-field`, `.custom-scrollbar`) auto-recolor via token names. `.glass-panel` is kept for legacy components; new surfaces use `bg-surface border border-border rounded-xl`. -- **Hook overlay backend** doesn't yet accept brand kit text/stroke colors; HookModal uses categorical S/M/L sizes only. To fully respect brand kit, `POST /api/hook` would need new fields (`font_family`, `text_color`, `stroke_color`, `stroke_width`) and `app/overlays/hooks.py:create_hook_image()` would need to use them. Currently it uses the bundled NotoSerif-Bold + hardcoded styling. -- **AI effect text** (Remotion path) doesn't yet honor brand kit. Effect-config generation in Gemini prompts is unaware of the user's brand. Plumbing the brand kit into `app/editing/prompts.py` would fix this. -- **HookModal** maps brand kit position to its simpler top/middle/bottom picker — fine but lossy. A full 9-anchor picker in HookModal would respect horizontal alignment too. +### State stores (custom event pattern) ---- +Each store: module-level `_state`, custom event listener via `window.addEventListener`, optional localStorage persistence. No Context, no Zustand. **Pattern source: `lib/brandKit.js`.** -## 10. Tests +| Store | Holds | Key event | +| --- | --- | --- | +| `state/keysStore.js` | gemini, uploadPost, elevenLabs, fal keys + uploadUserId profile | `openshorts:keys-changed` | +| `state/jobStore.js` | active Clip Generator job (jobId, status, results, logs, syncedTime, sessionRecovered) | `openshorts:job-changed` | +| `state/notificationsStore.js` | bell feed `{ id, type, platform, status, jobId, ts, message, read }`; max 50 items | `openshorts:notifications-changed` | +| `lib/brandKit.js` | colors / font / previewText / per-aspect styles | `brandKit:changed` | -```bash -cd backend -pytest -m "not e2e" -q # 62 tests, ~0.6s -pytest -m e2e -q # slow smoke test, needs real ffmpeg + tiny fixture video -``` +### Wizard state (`hooks/useWizard.js`) -Layout: -- `backend/tests/unit/` — pure-Python, fast (5 modules) -- `backend/tests/api/test_openapi_contract.py` — pins the 35-endpoint contract via JSON snapshot -- `backend/tests/e2e/test_pipeline_smoke.py` — full pipeline end-to-end (skipped by default) +`useReducer` with actions `NEXT`, `BACK`, `GOTO`, `SET_DATA`, `RESET`, `REHYDRATE`. Steps array supports `lock: true` to disable BACK (used for Processing). Auto-persists `{ step, data }` to localStorage if `storageKey` is supplied. -**Snapshot baseline** lives at `backend/tests/snapshots/baseline.openapi.json`. If you intentionally change the API surface, delete + regenerate: -```bash -cd backend -rm tests/snapshots/baseline.openapi.json -pytest tests/api/test_openapi_contract.py # will fail on first run, dropping new current.openapi.json -cp tests/snapshots/current.openapi.json tests/snapshots/baseline.openapi.json -rm tests/snapshots/current.openapi.json -pytest # should pass now -``` +**`resetOnRehydrate(merged)` predicate** — if it returns true after a rehydrate, the wizard forces step=0 + initialData and clears persistence. Both wizards pass a File-presence check, so a reloaded session that lost File handles snaps back to Upload instead of stranding the user mid-flow. + +### Short-form wizard + +- **Upload** — drag-drop or browse, up to 5 files, MP4/MOV ≤ 2 GB; client-side type + size validation; HTMLVideoElement-based duration probe for ETA. +- **Categorize** — 4 category cards per clip + 4 auto-edit toggles; amber gate banner + disabled "Start processing →" when no Gemini key. +- **Processing** — one `POST /api/process` per file in parallel; reactive to `keys.gemini`, ETA on the right, specific wait-state captions (`awaiting_key` / `uploading` / `queued` / `processing` / `complete` / `error`). Falls forward as soon as the key arrives. +- **Review** — 230 px clip list + PhoneFrame preview + Before/After toggle + export bar (Download / Publish×5 / Schedule×5 / Send to CapCut). Publish/Schedule pushes notifications. + +### Long-form wizard -**conftest.py** uses `sys.modules` stubs for heavy ML deps. That means the local venv does NOT need torch/mediapipe/ultralytics installed — the tests run anywhere Python+pytest+httpx+PIL+respx are available. +- **Upload** — single MP4/MOV up to 8 GB (4K supported). +- **Settings** — 5 toggles, each annotated with its backend TODO #. +- **Processing** — **simulated 5-stage progress** (no real backend call yet, all the toggles are TODOs). Persists a history entry + seeds 3 placeholder chapters on completion. +- **Editor** — 16:9 video preview + chapter timeline scrubber + right panel tabs (Chapters / Subtitles / Export). "Export segment as short" modal documents TODO #7. + +### Dashboard + +3 StatCards (clips processed = short-form clip count + long-form edit count; scheduled = notification count with `status='scheduled'`; published = `submitted`/`published`). Upcoming uploads + Recent activity panels filter the notifications store. Live backend feed lands with TODO #10. --- ## 11. Running the stack -### Full stack (recommended) ```bash +# Full stack (recommended) docker compose up --build -# Frontend (open this): http://localhost:3001 -# Backend API: http://localhost:3002 -# Renderer: http://localhost:3003 -``` +# Frontend → http://localhost:3001 +# Backend → http://localhost:3002 +# Renderer → http://localhost:3003 -### Backend only (local Python 3.11) -```bash +# If react-router-dom or any other npm dep was added since the last build: +docker compose exec frontend npm install # quick fix +# OR the proper way: +docker compose down -v && docker compose up --build # clears the anonymous /app/node_modules volume + +# Backend only (local Python 3.11) cd backend pip install -r requirements.txt -r requirements-dev.txt pip install -e . uvicorn app.main:app --host 0.0.0.0 --port 3002 -``` -### Frontend only (Node 18) -```bash +# Frontend only (Node 18) cd frontend npm install -npm run dev -# Vite proxy will fail unless backend is also running -``` +npm run dev # Vite proxy will fail unless backend is also running -### Pre-commit hook -```bash +# Pre-commit hook (regenerates CLAUDE.md auto-sections; fails commit on missing docstrings) bash scripts/install_hooks.sh -# Regenerates CLAUDE.md auto-sections on every commit; fails commit if any -# app/*.py is missing a module docstring -``` -### .env setup -```bash +# .env setup cp .env.example .env -# Edit .env and set GEMINI_API_KEY (only required key) -# Other keys (ElevenLabs, fal.ai, Upload-Post) go in the dashboard UI, not .env +# Set GEMINI_API_KEY. Other keys go in the dashboard UI (Settings → API Keys), not .env. ``` --- -## 12. Deferred work / roadmap +## 12. Tests -Logged in `ROADMAP.md`. Quick summary: +```bash +cd backend +pytest -m "not e2e" -q # 61/62 (one pre-existing Pydantic drift) +pytest -m e2e -q # slow smoke test; needs real ffmpeg + fixture video +``` -### Brand Kit polish -- Wire brand kit text/stroke color into `POST /api/hook` + `app/overlays/hooks.py:create_hook_image()`. -- Plumb brand kit into AI-effect generation (`app/editing/prompts.py`). -- Add a full 9-anchor picker to HookModal (currently uses simpler top/middle/bottom). +Layout: -### Three feature designs (in shipping order) +- `backend/tests/unit/` — pure-Python, fast (5 modules). +- `backend/tests/api/test_openapi_contract.py` — pins the 35-endpoint contract via JSON snapshot. +- `backend/tests/e2e/test_pipeline_smoke.py` — skipped by default. -#### Feature C — Motion Graphics Library (ships first) -- New `app/motion_graphics/base.py:MotionGraphicEffect(ABC)` -- `app/motion_graphics/compositor.py:MotionGraphicsCompositor` — batches multiple effects into a single `filter_complex` so the video re-encodes once, not once per effect. -- Initial library: `LowerThirdsEffect`, `CalloutEffect`, `AnimatedEmojiEffect`, `ProgressBarEffect`. -- API: `GET /api/motion-graphics/library`, `POST /api/motion-graphics/render`. +**OpenAPI baseline regen** (only when the route surface or schema deliberately changes): -#### Feature A — Background Soundtracks + Ducking -- New `app/audio/mixer.py:mix_audio_tracks()` — uses Whisper word timings OR FFmpeg `silencedetect` to compute speech intervals; ducks music by `ducking_db` dB during speech. -- `app/audio/library.py` — reads `assets/music/manifest.json` (genre/mood/length per track). No external API in v1. -- `app/audio/cues.py` — optional Gemini-based SFX moment detection. Prompt at `app/prompts/sfx_cues.md`. -- API: `POST /api/audio/apply` with `{ job_id, clip_index, music_track_id, sfx_cues, ducking_db }`. +```bash +cd backend +rm tests/snapshots/baseline.openapi.json +pytest tests/api/test_openapi_contract.py # fails on first run, drops current.openapi.json +cp tests/snapshots/current.openapi.json tests/snapshots/baseline.openapi.json +rm tests/snapshots/current.openapi.json +pytest +``` -#### Feature B — Layout Templates (biggest) -- New `app/layouts/base.py:Layout(ABC)` with a single `render_frame(frame, detections, frame_number)` method. -- `VerticalPanoramaLayout` wraps current TRACK/GENERAL behavior. -- `EducationalLayout` — two cameramen: top half = source content (screencast-style), bottom half = presenter headshot. -- `SideBySideLayout`, `PictureInPictureLayout` — stubs for future expansion. -- Frame loop in `app/video/pipeline.py` becomes `layout.render_frame(...)` — polymorphic call replaces today's inline TRACK/GENERAL branching. -- Job request gains `layout: "panorama" | "educational"` (default panorama). +`conftest.py` uses `sys.modules` stubs for heavy ML deps. The local venv does NOT need torch / mediapipe / ultralytics. -### Deferred refactors -- Full router split: 2256-line `backend/app/main.py` → 11 router modules under `backend/app/routes/` + a `create_app()` factory. -- FFmpeg wrapper migration: every `subprocess.run(['ffmpeg', ...])` call → `app.video.ffmpeg`. -- SaaSShorts internal split: `backend/app/saas/pipeline.py` (1474 lines) → research/scripting/media/compositing/pipeline. -- Extract `app/core/job_store.py` + `app/core/api_keys.py` from `main.py` alongside the router split. -- Frontend restructure (App.jsx is large; centralize an api client) — deliberately out of scope this round. +**Frontend build** is the closest analog to a frontend test: ---- +```bash +cd frontend && npm run build # 1616 modules, ~1289 KB JS chunk +``` -## 13. Conventions +Currently **0 warnings**. If you reintroduce a warning, fix it before claiming done. -1. **Single FFmpeg wrapper**: `app/video/ffmpeg.py` exists but most callers still use `subprocess.run(['ffmpeg', ...])` directly. New code MUST use the wrapper; migration of existing code is incremental. -2. **API keys via headers, not env**: client-side keys (Gemini override, ElevenLabs, Upload-Post, fal.ai) arrive on each request as `X-...-Key`. A future `app/core/api_keys.py` will be the only allowed reader. -3. **Prompts as files**: new Gemini prompts go in `app/prompts/<name>.md`. Inline prompts may stay in `app/editing/prompts.py` for tightly-coupled cases. -4. **Every module starts with a docstring**: the pre-commit hook (`scripts/update_claude_md.py`) fails the commit if any `.py` under `backend/app/` lacks one. Single line. -5. **Tests first**: keep `pytest -m "not e2e"` 100% green. The OpenAPI snapshot in `tests/snapshots/baseline.openapi.json` pins the public surface. -6. **No new global dicts in routers**: today, `app/main.py` still owns `jobs`, `thumbnail_sessions`, `publish_jobs`, `saas_jobs`. Centralize into `app/core/job_store.py` (planned). +**Manual browser smoke test** is REQUIRED for UI changes — `npm run build` doesn't catch runtime. Use the chrome-devtools MCP tools to upload, click through, verify console messages, etc. Don't ship a wizard step without exercising it. --- -## 14. Known gotchas +## 13. Where to start reading -1. **Async event loop warning in tests**: `Queue dispatch error: Task ... got Future attached to a different loop` — appears once in the API contract test. Cosmetic; the test still passes (62/62). Caused by the FastAPI startup event spawning the queue worker; cleanup is missed at teardown. Not blocking. -2. **First Docker build is slow** (5–10 min): backend image is 10.7 GB because of torch + ultralytics + mediapipe. After the first build, cached layers make subsequent rebuilds quick. -3. **`RENDER_SERVICE_URL=http://renderer:3100`** is the **Docker internal** URL. Do not change to `localhost:3003` unless you're running the backend OUTSIDE Docker. The renderer container's internal port is still 3100; only the HOST mapping changed to 3003. -4. **Vite proxy uses internal Docker names** (`http://backend:8000`, `http://renderer:3100`) — these are correct as-is. -5. **`assets/fonts/user/`** is volume-mounted into the backend container for read+write. Uploaded fonts survive container restarts. -6. **The `_bk` module-level snapshot pattern is GONE** — both SubtitleModal and HookModal now use the `useBrandKit()` hook, which subscribes to live changes. If you re-introduce module-level reads, the modals will go stale. -7. **OpenAPI snapshot drift**: any new endpoint or Pydantic field change requires regenerating `baseline.openapi.json` (see section 10). +If you're a new agent and want to orient quickly, read in this order: + +1. **This handoff (HANDOFF.md)** — §3 (bugs), §5 (work), §6 (rules). +2. `CLAUDE.md` — auto-managed sections are current; convention rules at the bottom. +3. `frontend/src/App.jsx` (47 lines) — the route map. +4. `frontend/src/hooks/useWizard.js` — the wizard state machine driving both flows. +5. `frontend/src/hooks/useJobPolling.js` — the canonical reference for consuming `/api/status` (backend/wizard vocab mapping). +6. `frontend/src/pages/ShortForm/Wizard.jsx` + `steps/Processing.jsx` — the short-form flow end-to-end + the reactive key handling. +7. `backend/app/main.py` — FastAPI app + all routes (split is deferred). +8. `backend/app/video/pipeline.py` — the per-frame reframing loop. +9. `ROADMAP.md` — product + technical roadmap. --- -## 15. Useful commands cheatsheet +## 14. Useful commands cheatsheet ```bash # === Status === git log --oneline -10 docker compose ps curl http://localhost:3002/api/config # backend health +curl -s http://localhost:3002/openapi.json | jq '.paths | keys' # === Tests === -cd backend && pytest -m "not e2e" -q # fast suite -cd backend && pytest -v # everything +cd backend && pytest -m "not e2e" -q +cd frontend && npm run build # === Restart only one service === docker compose restart backend # picks up code changes via volume mount docker compose restart frontend docker compose restart renderer -# === Full rebuild === -docker compose down +# === Full rebuild (use after package.json changes) === +docker compose down -v docker compose up --build -# === Disk usage === +# === Disk === docker system df docker system prune -af # nuclear cleanup -# === Regenerate CLAUDE.md === -python3 scripts/update_claude_md.py # idempotent - -# === API surface inspection === -curl http://localhost:3002/openapi.json | jq '.paths | keys' +# === Regenerate CLAUDE.md auto-sections === +python3 scripts/update_claude_md.py -# === Revert everything === -git reset --hard pre-restructure-20260519-1526 # nuclear undo +# === Revert to pre-restructure state === +git reset --hard pre-restructure-20260519-1526 ``` --- -## 16. What to tell the next agent - -If you're handing this to a new LLM: - -1. **The codebase is in a healthy state.** Tests pass. Stack runs. -2. **The Brand Kit work is uncommitted.** Ask the user before committing. Once approved, suggested commit message: - ``` - feat(brand-kit): brand kit settings with 9-anchor positioning, per-ratio styling, font upload, and live chunk-cycling preview - ``` -3. **Don't touch the FFmpeg subprocess calls** unless you're doing the full wrapper migration. There are dozens of callers and they currently all work. -4. **The OpenAPI snapshot test will fail** the moment you add or remove any route. See section 10 for regen. -5. **Read CLAUDE.md and ROADMAP.md** — they're current. -6. **The user's preferred next steps**, in priority order: - a. Polish Brand Kit (wire into hooks + AI effects + HookModal full 9-anchor). - b. Ship motion graphics (feature C) — its compositor is the prerequisite for feature A. - c. Ship soundtracks + ducking (feature A). - d. Ship layout templates (feature B). -7. **Don't merge to `main` until the user explicitly approves** — the branch has 19 commits + the brand kit work; they want to review. - ---- - -*Generated for handoff. Last updated: 2026-05-19. Branch: `chore/restructure-and-docs`.* +*Generated for handoff at HEAD `7d073cb` + uncommitted Short-form Processing UX rewrite. Last updated: 2026-05-20. Branch: `chore/restructure-and-docs` (27 commits ahead of `main`, no push).* diff --git a/frontend/src/pages/ShortForm/steps/Upload.jsx b/frontend/src/pages/ShortForm/steps/Upload.jsx index 610ee0af..5c52873f 100644 --- a/frontend/src/pages/ShortForm/steps/Upload.jsx +++ b/frontend/src/pages/ShortForm/steps/Upload.jsx @@ -1,5 +1,5 @@ // Step 1: Upload. Drag-drop + browse, up to 5 files, MP4/MOV <= 2 GB. -// Each entry: { id, file (File), name, size } +// Each entry: { id, file (File), name, size, durationSec? } import { useRef, useState } from 'react'; import { FileVideo, UploadCloud, X } from 'lucide-react'; @@ -16,6 +16,31 @@ function fmtSize(bytes) { return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`; } +function fmtDuration(secs) { + if (secs < 60) return `${Math.round(secs)}s`; + const m = Math.floor(secs / 60), s = Math.round(secs % 60); + return s > 0 ? `${m}m ${s}s` : `${m}m`; +} + +// Probe video duration via a hidden HTMLVideoElement. Used by Step 3 +// (Processing) to estimate ETA. Returns null if the metadata can't be +// read (rare — non-MP4 fakes, corrupt files). +function probeDurationSec(file) { + return new Promise((resolve) => { + const url = URL.createObjectURL(file); + const video = document.createElement('video'); + video.preload = 'metadata'; + const cleanup = () => { URL.revokeObjectURL(url); }; + video.onloadedmetadata = () => { + const d = Number.isFinite(video.duration) ? video.duration : null; + cleanup(); + resolve(d); + }; + video.onerror = () => { cleanup(); resolve(null); }; + video.src = url; + }); +} + export default function Upload({ wizard }) { const inputRef = useRef(null); const [dragOver, setDragOver] = useState(false); @@ -35,9 +60,20 @@ export default function Upload({ wizard }) { const okType = ALLOWED_TYPES.includes(f.type) || /\.(mp4|mov)$/i.test(f.name); if (!okType) { setError(`${f.name}: only MP4 / MOV files.`); continue; } if (f.size > MAX_SIZE_BYTES) { setError(`${f.name}: over 2 GB.`); continue; } - accepted.push({ id: nextId(), file: f, name: f.name, size: f.size }); + accepted.push({ id: nextId(), file: f, name: f.name, size: f.size, durationSec: null }); + } + if (accepted.length) { + wizard.setData({ files: [...files, ...accepted] }); + // Probe durations asynchronously — Processing uses them to estimate ETA. + accepted.forEach(async (entry) => { + const d = await probeDurationSec(entry.file); + if (d == null) return; + wizard.setData((prev) => ({ + ...prev, + files: (prev.files || []).map((p) => p.id === entry.id ? { ...p, durationSec: d } : p), + })); + }); } - if (accepted.length) wizard.setData({ files: [...files, ...accepted] }); } function removeFile(id) { @@ -96,7 +132,10 @@ export default function Upload({ wizard }) { <FileVideo size={18} className="text-zinc-500 shrink-0" /> <div className="flex-1 min-w-0"> <div className="text-[13px] text-white truncate">{f.name}</div> - <div className="text-[11px] text-zinc-500">{fmtSize(f.size)}</div> + <div className="text-[11px] text-zinc-500"> + {fmtSize(f.size)} + {f.durationSec != null && ` · ${fmtDuration(f.durationSec)}`} + </div> </div> <button onClick={() => removeFile(f.id)} From 1146194a1988d0b6a19e88045fbf0c7a908e49ca Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 10:37:19 -0400 Subject: [PATCH 30/43] feat(short-form): auto-pipeline + reactive key gate + Brand-Kit subtitles Phase 1 of the polish work. Two layers of changes landed together because they touch the same files: Auto-pipeline (new): - POST /api/process now accepts category + 4 auto-edit toggles (auto_edit, auto_subtitles, color_grade, silence_removal) plus a subtitle_style JSON from useBrandKit(). All bounds-checked via the new SubtitleStyle pydantic model in main.py and _parse_subtitle_style; invalid input returns 400. - After the CLI subprocess produces raw reframed clips, run_job calls _run_auto_pipeline (new) which chains AI edit -> color grade (Phase 2 stub) -> silence removal (Phase 2 stub) -> subtitles per clip. Each step writes a sibling file; originals are preserved so Phase 3's per-clip Review toggles can swap URLs without re-rendering. Per-clip failures log but never fail the whole job. - Helpers moved to backend/app/editing/auto_pipeline.py so the route handlers (/api/edit, /api/subtitle) keep working unchanged for the legacy ResultCard. - status='completed' is now flipped AFTER the auto-pipeline finishes, so the wizard never navigates to Review with raw URLs mid-polish. - Frontend: Categorize relabels faceLayout -> autoEdit, reorders to match the backend chain. Processing.jsx hooks useBrandKit() and sends the new Form fields. Brand-kit 3x3 positions are aliased server-side to the burner's top/middle/bottom. - backend/tests/unit/test_auto_pipeline_config.py (NEW, 48 tests) pins bounds, hex-color rejection, position aliasing, bool coercion, and the category allowlist. - backend/tests/snapshots/baseline.openapi.json regenerated for the 6 new Form fields. Carried in from the prior session (intermingled in the same files): - Categorize.jsx adds an amber 'no Gemini key' gate banner + disables Start Processing when the key is missing. - Processing.jsx becomes reactive to keys.gemini (drops startedRef), surfaces ETA from probed duration, and splits Queued into awaiting_key / uploading / queued / processing / complete / error with specific captions. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- backend/app/editing/auto_pipeline.py | 140 ++++++++++ backend/app/main.py | 251 +++++++++++++++++- backend/tests/snapshots/baseline.openapi.json | 66 +++++ .../tests/unit/test_auto_pipeline_config.py | 183 +++++++++++++ .../src/pages/ShortForm/steps/Categorize.jsx | 40 ++- .../src/pages/ShortForm/steps/Processing.jsx | 199 +++++++++++--- 6 files changed, 825 insertions(+), 54 deletions(-) create mode 100644 backend/app/editing/auto_pipeline.py create mode 100644 backend/tests/unit/test_auto_pipeline_config.py diff --git a/backend/app/editing/auto_pipeline.py b/backend/app/editing/auto_pipeline.py new file mode 100644 index 00000000..caeee20c --- /dev/null +++ b/backend/app/editing/auto_pipeline.py @@ -0,0 +1,140 @@ +"""Reusable post-processing helpers called by /api/process (auto-pipeline) and per-clip routes.""" + +from __future__ import annotations + +import os +import shutil +import time +from typing import Any, Dict, Optional + +from app.editing.ai_filters import VideoEditor +from app.overlays.subtitles_generate import generate_srt, generate_srt_from_video +from app.overlays.subtitles_render import burn_subtitles + +OUTPUT_DIR = "output" + + +def _job_dir(job_id: str) -> str: + return os.path.join(OUTPUT_DIR, job_id) + + +def apply_ai_edit( + *, + api_key: str, + job_id: str, + input_filename: str, + transcript: Optional[Dict[str, Any]] = None, +) -> str: + """Apply Gemini-driven FFmpeg effects to a clip. + + Writes ``edited_{input_filename}`` next to the input. Idempotent: returns + the existing output when one is present and non-empty. Raises on any + Gemini / FFmpeg / cv2 failure — caller decides whether to log and skip + or to mark the whole job as failed. + """ + job_dir = _job_dir(job_id) + input_path = os.path.join(job_dir, input_filename) + edited_filename = f"edited_{input_filename}" + output_path = os.path.join(job_dir, edited_filename) + + if os.path.exists(output_path) and os.path.getsize(output_path) > 0: + return edited_filename + + editor = VideoEditor(api_key=api_key) + + # ASCII-safe temp paths mirror the per-clip /api/edit handler — avoids + # subprocess UnicodeEncodeError on filesystems with mixed encodings. + stamp = int(time.time() * 1000) + safe_input_path = os.path.join(job_dir, f"temp_input_auto_{stamp}.mp4") + shutil.copy(input_path, safe_input_path) + + try: + vid_file = editor.upload_video(safe_input_path) + + import cv2 # local import keeps module-load cheap + + cap = cv2.VideoCapture(safe_input_path) + fps = cap.get(cv2.CAP_PROP_FPS) + frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + duration = frame_count / fps if fps else 0 + cap.release() + + filter_data = editor.get_ffmpeg_filter( + vid_file, duration, + fps=fps, width=width, height=height, + transcript=transcript, + ) + + safe_output = os.path.join(job_dir, f"temp_output_auto_{stamp}.mp4") + editor.apply_edits(safe_input_path, safe_output, filter_data) + + if os.path.exists(safe_output): + shutil.move(safe_output, output_path) + finally: + if os.path.exists(safe_input_path): + os.remove(safe_input_path) + + return edited_filename + + +def apply_subtitles( + *, + job_id: str, + clip_index: int, + input_filename: str, + transcript: Dict[str, Any], + clip_start: float, + clip_end: float, + style: Dict[str, Any], +) -> str: + """Burn subtitles onto a clip with brand-kit styling. + + Writes ``subtitled_{input_filename}``. Idempotent — returns the existing + output when present and non-empty. + + ``style`` keys (all optional, defaults applied): ``position``, + ``font_size``, ``font_name``, ``font_color``, ``border_color``, + ``border_width``, ``bg_color``, ``bg_opacity``, ``words_per_line``, + ``text_case``. Mirrors the SubtitleRequest schema in main.py. + """ + job_dir = _job_dir(job_id) + input_path = os.path.join(job_dir, input_filename) + output_filename = f"subtitled_{input_filename}" + output_path = os.path.join(job_dir, output_filename) + + if os.path.exists(output_path) and os.path.getsize(output_path) > 0: + return output_filename + + srt_path = os.path.join(job_dir, f"subs_{clip_index}_{int(time.time())}.srt") + + max_words = style.get('words_per_line') + if max_words is not None and max_words <= 0: + max_words = None + text_case = style.get('text_case') or 'original' + + # Mirror /api/subtitle: dubbed clips need fresh transcription because + # the original transcript no longer matches the audio track. + is_dubbed = input_filename.startswith("translated_") + if is_dubbed: + success = generate_srt_from_video(input_path, srt_path, max_words=max_words, text_case=text_case) + else: + success = generate_srt(transcript, clip_start, clip_end, srt_path, max_words=max_words, text_case=text_case) + + if not success: + raise RuntimeError(f"No words found for clip range [{clip_start}, {clip_end}]") + + burn_subtitles( + input_path, srt_path, output_path, + alignment=style.get('position', 'bottom'), + fontsize=int(style.get('font_size', 16)), + font_name=style.get('font_name', 'Verdana'), + font_color=style.get('font_color', '#FFFFFF'), + border_color=style.get('border_color', '#000000'), + border_width=int(style.get('border_width', 2)), + bg_color=style.get('bg_color', '#000000'), + bg_opacity=float(style.get('bg_opacity', 0.0)), + ) + + return output_filename diff --git a/backend/app/main.py b/backend/app/main.py index 729b538e..f25375da 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -8,14 +8,15 @@ import glob import time import asyncio +from functools import partial from dotenv import load_dotenv -from typing import Dict, Optional, List +from typing import Any, Dict, Optional, List from contextlib import asynccontextmanager from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request, Header, BackgroundTasks from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from fastapi.responses import HTMLResponse -from pydantic import BaseModel +from pydantic import BaseModel, Field, ValidationError, field_validator from app.integrations.s3 import upload_job_artifacts, list_all_clips, upload_actor_to_s3, list_actor_gallery, upload_video_to_gallery, list_video_gallery load_dotenv() @@ -187,6 +188,97 @@ async def lifespan(app: FastAPI): os.makedirs(THUMBNAILS_DIR, exist_ok=True) app.mount("/thumbnails", StaticFiles(directory=THUMBNAILS_DIR), name="thumbnails") +# --------------------------------------------------------------------------- +# Short-form auto-pipeline config (Phase 1 — see ~/.claude/plans/...-cray.md) +# +# When POST /api/process arrives with auto_subtitles / auto_edit / color_grade +# / silence_removal toggles, run_job dispatches the post-processing chain +# AFTER the CLI subprocess produces the raw reframed clips. Each enabled step +# writes a sibling file (originals are preserved) and the polished URL is +# what the wizard surfaces in Review. The full chain (Phase 2 wires the last +# two) is: AI edit → color grade → silence removal → subtitles. +# --------------------------------------------------------------------------- + +_AUTO_ALLOWED_CATEGORIES = {"educational", "yap", "live", "viral"} +_AUTO_ALLOWED_POSITIONS = {"top", "middle", "bottom"} +_AUTO_ALLOWED_TEXT_CASES = {"original", "upper", "lower"} +# Brand-kit positions use a 3x3 grid (bottom-center, top-left, ...). The +# subtitle burner only knows top/middle/bottom — alias the grid down to that. +_POSITION_ALIASES = { + "top-left": "top", "top-center": "top", "top-right": "top", + "middle-left": "middle", "middle-center": "middle", "middle-right": "middle", + "bottom-left": "bottom", "bottom-center": "bottom", "bottom-right": "bottom", +} + + +class SubtitleStyle(BaseModel): + """Bounds-checked subtitle styling. Built from useBrandKit() JSON on the wire.""" + + position: str = "bottom" + font_size: int = Field(default=16, ge=8, le=120) + font_name: str = "Verdana" + font_color: str = "#FFFFFF" + border_color: str = "#000000" + border_width: int = Field(default=2, ge=0, le=20) + bg_color: str = "#000000" + bg_opacity: float = Field(default=0.0, ge=0.0, le=1.0) + words_per_line: Optional[int] = Field(default=None, ge=0, le=20) + text_case: Optional[str] = None + + @field_validator("position", mode="before") + @classmethod + def _normalize_position(cls, v: Any) -> str: + s = str(v or "bottom").strip().lower() + s = _POSITION_ALIASES.get(s, s) + if s not in _AUTO_ALLOWED_POSITIONS: + raise ValueError(f"position must be one of {sorted(_AUTO_ALLOWED_POSITIONS)} or a 3x3 grid alias") + return s + + @field_validator("text_case", mode="before") + @classmethod + def _normalize_text_case(cls, v: Any) -> Optional[str]: + if v is None or v == "": + return None + s = str(v).strip().lower() + if s not in _AUTO_ALLOWED_TEXT_CASES: + raise ValueError(f"text_case must be one of {sorted(_AUTO_ALLOWED_TEXT_CASES)}") + return s + + @field_validator("font_color", "border_color", "bg_color", mode="before") + @classmethod + def _validate_hex_color(cls, v: Any) -> str: + if not isinstance(v, str) or len(v) != 7 or not v.startswith("#"): + raise ValueError("color must be #RRGGBB hex string") + try: + int(v[1:], 16) + except ValueError as exc: + raise ValueError("color must be #RRGGBB hex string") from exc + return v.upper() + + +def _normalize_bool_form(value: Optional[str]) -> bool: + """Coerce a multipart-form string to bool. ``None`` / unset reads as False.""" + if value is None: + return False + return str(value).strip().lower() in ("1", "true", "yes", "on") + + +def _parse_subtitle_style(raw: Optional[str]) -> Dict[str, Any]: + """Validate the brand-kit subtitle JSON. Returns ``{}`` when unset.""" + if not raw: + return {} + try: + data = json.loads(raw) + except (json.JSONDecodeError, TypeError) as exc: + raise HTTPException(status_code=400, detail=f"subtitle_style must be valid JSON: {exc}") + if not isinstance(data, dict): + raise HTTPException(status_code=400, detail="subtitle_style must be a JSON object") + try: + return SubtitleStyle.model_validate(data).model_dump() + except ValidationError as exc: + raise HTTPException(status_code=400, detail=f"subtitle_style validation failed: {exc.errors()}") + + class ProcessRequest(BaseModel): url: str @@ -271,13 +363,12 @@ async def run_job(job_id, job_data): returncode = process.returncode if returncode == 0: - jobs[job_id]['status'] = 'completed' jobs[job_id]['logs'].append("Process finished successfully.") - + # Start S3 upload in background (silent, non-blocking) loop = asyncio.get_event_loop() loop.run_in_executor(None, upload_job_artifacts, output_dir, job_id) - + # Find result JSON json_files = glob.glob(os.path.join(output_dir, "*_metadata.json")) if not json_files: @@ -285,10 +376,10 @@ async def run_job(job_id, job_data): if _relocate_root_job_artifacts(job_id, output_dir): json_files = glob.glob(os.path.join(output_dir, "*_metadata.json")) if json_files: - target_json = json_files[0] + target_json = json_files[0] with open(target_json, 'r') as f: data = json.load(f) - + # Enhance result with video URLs base_name = os.path.basename(target_json).replace('_metadata.json', '') clips = data.get('shorts', []) @@ -297,8 +388,19 @@ async def run_job(job_id, job_data): for i, clip in enumerate(clips): clip_filename = f"{base_name}_clip_{i+1}.mp4" clip['video_url'] = f"/videos/{job_id}/{clip_filename}" - + jobs[job_id]['result'] = {'clips': clips, 'cost_analysis': cost_analysis} + + # Post-processing chain (subtitles, AI effects, ...). Failures + # here log but do NOT fail the whole job — a raw clip is better + # than no clip. Status stays 'processing' until this returns so + # the wizard doesn't navigate to Review with raw URLs. + try: + await _run_auto_pipeline(job_id, target_json, job_data.get('env', {}), jobs[job_id].get('auto', {})) + except Exception as exc: + jobs[job_id]['logs'].append(f"⚠️ auto-pipeline error: {exc}") + + jobs[job_id]['status'] = 'completed' else: jobs[job_id]['status'] = 'failed' jobs[job_id]['logs'].append("No metadata file generated.") @@ -310,6 +412,110 @@ async def run_job(job_id, job_data): jobs[job_id]['status'] = 'failed' jobs[job_id]['logs'].append(f"Execution error: {str(e)}") + +async def _run_auto_pipeline( + job_id: str, + metadata_path: str, + env: Dict[str, str], + auto_config: Dict[str, Any], +) -> None: + """Apply the per-clip post-processing chain configured in /api/process. + + Order: AI edit → color grade (Phase 2) → silence removal (Phase 2) → subtitles. + Each step writes a sibling file; the original ``_clip_N.mp4`` is preserved + so Review's per-clip toggles (Phase 3) can swap URLs without re-rendering. + + Per-clip failures append to ``jobs[job_id]['logs']`` and do NOT mark the + job as failed — a polished clip missing one effect is better than nothing. + """ + if not auto_config: + return + if not any(auto_config.get(k) for k in ("auto_edit", "auto_subtitles", "color_grade", "silence_removal")): + return + + # Lazy import keeps cold-start cheap when no auto-pipeline is requested. + from app.editing.auto_pipeline import apply_ai_edit, apply_subtitles + + try: + with open(metadata_path, 'r') as f: + metadata = json.load(f) + except Exception as exc: + jobs[job_id]['logs'].append(f"⚠️ auto-pipeline: cannot read metadata: {exc}") + return + + transcript = metadata.get('transcript') + api_key = env.get('GEMINI_API_KEY') or os.environ.get('GEMINI_API_KEY') + style = auto_config.get('subtitle_style') or {} + + loop = asyncio.get_event_loop() + clips = jobs[job_id]['result']['clips'] + + for i, clip in enumerate(clips): + original_filename = clip['video_url'].split('/')[-1] + current_filename = original_filename + variants: Dict[str, Optional[str]] = { + 'original': original_filename, + 'edited': None, + 'graded': None, # Phase 2 + 'silencecut': None, # Phase 2 + 'subtitled': None, + } + + if auto_config.get('auto_edit') and api_key: + try: + edited = await loop.run_in_executor( + None, + partial( + apply_ai_edit, + api_key=api_key, + job_id=job_id, + input_filename=current_filename, + transcript=transcript, + ), + ) + variants['edited'] = edited + current_filename = edited + jobs[job_id]['logs'].append(f"🎬 auto AI-edit applied to clip {i + 1}") + except Exception as exc: + jobs[job_id]['logs'].append(f"⚠️ auto AI-edit failed for clip {i + 1}: {exc}") + + # color_grade — Phase 2 + # silence_removal — Phase 2 + + if auto_config.get('auto_subtitles') and transcript: + try: + subtitled = await loop.run_in_executor( + None, + partial( + apply_subtitles, + job_id=job_id, + clip_index=i, + input_filename=current_filename, + transcript=transcript, + clip_start=clip.get('start', 0), + clip_end=clip.get('end', 0), + style=style, + ), + ) + variants['subtitled'] = subtitled + current_filename = subtitled + jobs[job_id]['logs'].append(f"✍️ auto subtitles burned on clip {i + 1}") + except Exception as exc: + jobs[job_id]['logs'].append(f"⚠️ auto subtitles failed for clip {i + 1}: {exc}") + + variants['polished'] = current_filename + clip['variants'] = variants + clip['video_url'] = f"/videos/{job_id}/{current_filename}" + + # Persist variants to disk so Review (Phase 3) can read them after a reload. + metadata['shorts'] = clips + try: + with open(metadata_path, 'w') as f: + json.dump(metadata, f, indent=4) + except Exception as exc: + jobs[job_id]['logs'].append(f"⚠️ auto-pipeline: failed to persist variants: {exc}") + + _ALLOWED_VIDEO_EXTS = {".mp4", ".mov"} # MP4 / MOV files start with a 'ftyp' box at byte offset 4. # Reference: ISO/IEC 14496-12 (MP4 container spec). @@ -345,7 +551,14 @@ async def process_endpoint( request: Request, file: Optional[UploadFile] = File(None), url: Optional[str] = Form(None), - acknowledged: Optional[str] = Form(None) + acknowledged: Optional[str] = Form(None), + # Phase 1 auto-pipeline fields — see _run_auto_pipeline below. + category: Optional[str] = Form(None), + auto_subtitles: Optional[str] = Form(None), + auto_edit: Optional[str] = Form(None), + color_grade: Optional[str] = Form(None), # honored in Phase 2 + silence_removal: Optional[str] = Form(None), # honored in Phase 2 + subtitle_style: Optional[str] = Form(None), # JSON string from useBrandKit() ): api_key = request.headers.get("X-Gemini-Key") if not api_key: @@ -383,6 +596,23 @@ async def process_endpoint( "source": "url" if url else "file", } + # Validate + freeze the auto-pipeline config used by run_job after the + # CLI subprocess completes. Raises 400 on out-of-bounds input. + cat_raw = (category or "").strip().lower() or None + if cat_raw is not None and cat_raw not in _AUTO_ALLOWED_CATEGORIES: + raise HTTPException( + status_code=400, + detail=f"Invalid category {cat_raw!r}. Allowed: {sorted(_AUTO_ALLOWED_CATEGORIES)}", + ) + auto_config = { + "category": cat_raw, + "auto_edit": _normalize_bool_form(auto_edit), + "auto_subtitles": _normalize_bool_form(auto_subtitles), + "color_grade": _normalize_bool_form(color_grade), + "silence_removal": _normalize_bool_form(silence_removal), + "subtitle_style": _parse_subtitle_style(subtitle_style), + } + job_id = str(uuid.uuid4()) job_output_dir = os.path.join(OUTPUT_DIR, job_id) os.makedirs(job_output_dir, exist_ok=True) @@ -436,7 +666,8 @@ async def process_endpoint( 'cmd': cmd, 'env': env, 'output_dir': job_output_dir, - 'attestation': attestation + 'attestation': attestation, + 'auto': auto_config, } await job_queue.put(job_id) diff --git a/backend/tests/snapshots/baseline.openapi.json b/backend/tests/snapshots/baseline.openapi.json index 736b16b7..9f7fcd85 100644 --- a/backend/tests/snapshots/baseline.openapi.json +++ b/backend/tests/snapshots/baseline.openapi.json @@ -14,6 +14,50 @@ ], "title": "Acknowledged" }, + "auto_edit": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auto Edit" + }, + "auto_subtitles": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Auto Subtitles" + }, + "category": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Category" + }, + "color_grade": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Color Grade" + }, "file": { "anyOf": [ { @@ -26,6 +70,28 @@ ], "title": "File" }, + "silence_removal": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Silence Removal" + }, + "subtitle_style": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Subtitle Style" + }, "url": { "anyOf": [ { diff --git a/backend/tests/unit/test_auto_pipeline_config.py b/backend/tests/unit/test_auto_pipeline_config.py new file mode 100644 index 00000000..fea3205e --- /dev/null +++ b/backend/tests/unit/test_auto_pipeline_config.py @@ -0,0 +1,183 @@ +"""Tests for the short-form auto-pipeline config parsers in app.main. + +These pin SubtitleStyle bounds, _parse_subtitle_style error paths, and +_normalize_bool_form coercion. The pipeline itself (apply_ai_edit / +apply_subtitles) is exercised by Phase 5 e2e tests. +""" +from __future__ import annotations + +import json + +import pytest +from fastapi import HTTPException +from pydantic import ValidationError + +from app.main import ( + SubtitleStyle, + _AUTO_ALLOWED_CATEGORIES, + _normalize_bool_form, + _parse_subtitle_style, +) + + +# --- SubtitleStyle ---------------------------------------------------------- + +def test_subtitle_style_defaults_are_valid(): + s = SubtitleStyle.model_validate({}) + assert s.position == "bottom" + assert s.font_size == 16 + assert s.font_name == "Verdana" + assert s.font_color == "#FFFFFF" + assert s.bg_opacity == 0.0 + assert s.words_per_line is None + assert s.text_case is None + + +@pytest.mark.parametrize( + "grid, expected", + [ + ("bottom-center", "bottom"), + ("TOP-LEFT", "top"), + ("middle-right", "middle"), + ("bottom", "bottom"), + ], +) +def test_subtitle_style_aliases_brand_kit_grid(grid, expected): + s = SubtitleStyle.model_validate({"position": grid}) + assert s.position == expected + + +def test_subtitle_style_rejects_unknown_position(): + with pytest.raises(ValidationError): + SubtitleStyle.model_validate({"position": "centre-of-mass"}) + + +@pytest.mark.parametrize( + "case, expected", + [ + ("upper", "upper"), + ("LOWER", "lower"), + ("original", "original"), + ("", None), + (None, None), + ], +) +def test_subtitle_style_text_case_normalization(case, expected): + s = SubtitleStyle.model_validate({"text_case": case}) + assert s.text_case == expected + + +def test_subtitle_style_rejects_invalid_text_case(): + with pytest.raises(ValidationError): + SubtitleStyle.model_validate({"text_case": "title"}) + + +@pytest.mark.parametrize( + "field, bad", + [ + ("font_size", 7), # below ge=8 + ("font_size", 121), # above le=120 + ("border_width", -1), + ("border_width", 21), + ("bg_opacity", -0.1), + ("bg_opacity", 1.01), + ("words_per_line", -1), + ("words_per_line", 21), + ], +) +def test_subtitle_style_numeric_bounds(field, bad): + with pytest.raises(ValidationError): + SubtitleStyle.model_validate({field: bad}) + + +@pytest.mark.parametrize( + "hex_value", + ["#fff", "FFFFFF", "#1234567", "rgb(0,0,0)", "#ZZZZZZ", "", None, 123], +) +def test_subtitle_style_rejects_bad_hex_color(hex_value): + with pytest.raises(ValidationError): + SubtitleStyle.model_validate({"font_color": hex_value}) + + +def test_subtitle_style_hex_color_uppercased(): + s = SubtitleStyle.model_validate({"font_color": "#abcdef"}) + assert s.font_color == "#ABCDEF" + + +# --- _normalize_bool_form --------------------------------------------------- + +@pytest.mark.parametrize( + "raw, expected", + [ + ("true", True), + ("True", True), + ("TRUE", True), + ("1", True), + ("yes", True), + ("on", True), + ("false", False), + ("0", False), + ("no", False), + ("", False), + (None, False), + ("anything-else", False), + ], +) +def test_normalize_bool_form(raw, expected): + assert _normalize_bool_form(raw) is expected + + +# --- _parse_subtitle_style -------------------------------------------------- + +def test_parse_subtitle_style_empty_returns_empty_dict(): + assert _parse_subtitle_style(None) == {} + assert _parse_subtitle_style("") == {} + + +def test_parse_subtitle_style_round_trip_defaults(): + out = _parse_subtitle_style("{}") + assert out["position"] == "bottom" + assert out["font_size"] == 16 + + +def test_parse_subtitle_style_applies_validation_and_aliasing(): + payload = json.dumps({ + "position": "bottom-center", + "font_size": 72, + "font_color": "#ffffff", + "border_color": "#000000", + "border_width": 6, + "words_per_line": 2, + "text_case": "upper", + }) + out = _parse_subtitle_style(payload) + assert out["position"] == "bottom" + assert out["font_size"] == 72 + assert out["font_color"] == "#FFFFFF" + assert out["text_case"] == "upper" + + +def test_parse_subtitle_style_rejects_non_json(): + with pytest.raises(HTTPException) as exc: + _parse_subtitle_style("not-json-at-all") + assert exc.value.status_code == 400 + + +def test_parse_subtitle_style_rejects_non_object(): + with pytest.raises(HTTPException) as exc: + _parse_subtitle_style("[1, 2, 3]") + assert exc.value.status_code == 400 + + +def test_parse_subtitle_style_rejects_out_of_bounds(): + payload = json.dumps({"font_size": 500}) + with pytest.raises(HTTPException) as exc: + _parse_subtitle_style(payload) + assert exc.value.status_code == 400 + + +# --- _AUTO_ALLOWED_CATEGORIES sanity check --------------------------------- + +def test_categories_allowlist_matches_frontend_contract(): + # Matches the four cards in frontend/src/pages/ShortForm/steps/Categorize.jsx. + assert _AUTO_ALLOWED_CATEGORIES == {"educational", "yap", "live", "viral"} diff --git a/frontend/src/pages/ShortForm/steps/Categorize.jsx b/frontend/src/pages/ShortForm/steps/Categorize.jsx index 7aeb938d..94804ff5 100644 --- a/frontend/src/pages/ShortForm/steps/Categorize.jsx +++ b/frontend/src/pages/ShortForm/steps/Categorize.jsx @@ -5,7 +5,9 @@ // for new clips and let the user override per clip. import { useEffect } from 'react'; -import { GraduationCap, Mic, Sparkles, Tv } from 'lucide-react'; +import { GraduationCap, Mic, Sparkles, Tv, KeyRound } from 'lucide-react'; +import { Link } from 'react-router-dom'; +import { useKeys } from '../../../state/keysStore.js'; const CATEGORIES = [ { @@ -34,16 +36,19 @@ const CATEGORIES = [ }, ]; +// Order matches the run_job auto-pipeline chain: AI edit → grade → silence → subtitles. const TOGGLES = [ - { id: 'colorGrade', label: 'Color grade', hint: 'Apply a cinematic LUT (backend TODO #5).' }, - { id: 'autoSubtitles', label: 'Auto subtitles', hint: 'Transcribe + burn captions with brand-kit style.' }, - { id: 'silenceRemoval', label: 'Silence removal', hint: 'Auto-cut dead air (backend TODO #4).' }, - { id: 'faceLayout', label: 'Face-focus layout', hint: 'Lock crop to detected speakers.' }, + { id: 'autoEdit', label: 'AI edits (zoom / pan / cuts)', hint: 'Gemini picks zoompan + cut points per clip.' }, + { id: 'colorGrade', label: 'Color grade', hint: 'Apply a cinematic LUT (lands with /api/colorgrade in Phase 2).' }, + { id: 'silenceRemoval', label: 'Silence removal', hint: 'Auto-cut dead air (lands with /api/silencecut in Phase 2).' }, + { id: 'autoSubtitles', label: 'Auto subtitles', hint: 'Transcribe + burn captions with brand-kit style.' }, ]; export default function Categorize({ wizard }) { const files = wizard.data.files || []; const settings = wizard.data.settings || {}; + const keys = useKeys(); + const hasGeminiKey = !!keys.gemini; // TODO(backend): plan TODO #2 — replace this pre-fill with POST /api/categorize // on the file's transcript or thumbnail. @@ -126,11 +131,34 @@ export default function Categorize({ wizard }) { </div> </section> + {!hasGeminiKey && ( + <div className="rounded-xl border border-amber-500/30 bg-amber-500/10 p-4 flex items-start gap-3"> + <KeyRound size={16} className="text-amber-400 shrink-0 mt-0.5" /> + <div className="flex-1"> + <div className="text-[13px] text-amber-200 font-medium">Gemini API key required</div> + <div className="text-[12px] text-amber-200/80 mt-0.5"> + Processing needs a Gemini key to identify viral moments. Add one before continuing. + </div> + </div> + <Link + to="/settings/system/api-keys" + className="text-[12px] text-amber-200 hover:text-white underline shrink-0" + > + Open Settings → + </Link> + </div> + )} + <div className="flex items-center justify-between pt-4 border-t border-border"> <button onClick={wizard.back} className="text-[13px] text-zinc-400 hover:text-white transition-colors"> ← Back </button> - <button onClick={wizard.next} className="btn-primary px-5 py-2 text-[13px]"> + <button + onClick={wizard.next} + disabled={!hasGeminiKey} + title={!hasGeminiKey ? 'Set your Gemini API key in Settings first.' : ''} + className="btn-primary px-5 py-2 text-[13px] disabled:opacity-40 disabled:cursor-not-allowed" + > Start processing → </button> </div> diff --git a/frontend/src/pages/ShortForm/steps/Processing.jsx b/frontend/src/pages/ShortForm/steps/Processing.jsx index 5bd1bf90..95e83169 100644 --- a/frontend/src/pages/ShortForm/steps/Processing.jsx +++ b/frontend/src/pages/ShortForm/steps/Processing.jsx @@ -9,18 +9,50 @@ // /api/process/batch returning a list of job ids. import { useEffect, useRef, useState } from 'react'; -import { CheckCircle2, Loader2, XCircle } from 'lucide-react'; +import { CheckCircle2, Loader2, XCircle, KeyRound } from 'lucide-react'; +import { Link } from 'react-router-dom'; import { getApiUrl } from '../../../config'; import { useKeys } from '../../../state/keysStore.js'; +import { useBrandKit } from '../../../lib/brandKit.js'; import SnakeGame from '../../../components/ui/SnakeGame.jsx'; const POLL_MS = 2000; const HISTORY_KEY = 'openshorts.shortForm.history'; -async function startJob({ file, geminiKey, signal }) { +// Processing time ≈ source duration × 1.2 on CPU (transcription + per-frame +// reframing dominate). Refined as logs arrive. Treat as a hint, not a SLA. +const ETA_MULTIPLIER = 1.2; +const ETA_FLOOR_SEC = 25; // small clips still pay fixed-cost setup +const ETA_FALLBACK_SEC = 60; // when we couldn't probe duration + +// Build the SubtitleStyle JSON the backend's SubtitleStyle pydantic model +// validates against (see backend/app/main.py). Brand-kit positions are 3x3 +// grid strings (bottom-center, top-left, ...); the backend aliases those +// down to its top/middle/bottom enum. +function buildSubtitleStyle(brandKit) { + const style = brandKit?.styles?.['9:16'] || {}; + return { + position: style.position ?? 'bottom-center', + font_size: style.size ?? 16, + font_name: brandKit?.font?.family ?? 'Verdana', + font_color: style.textColor ?? '#FFFFFF', + border_color: style.strokeColor ?? '#000000', + border_width: style.strokeWidth ?? 2, + words_per_line: style.wordsPerLine ?? null, + text_case: style.textCase ?? null, + }; +} + +async function startJob({ file, geminiKey, category, settings, subtitleStyle, signal }) { const formData = new FormData(); formData.append('file', file); formData.append('acknowledged', 'true'); + if (category) formData.append('category', category); + formData.append('auto_edit', settings.autoEdit ? 'true' : 'false'); + formData.append('auto_subtitles', settings.autoSubtitles ? 'true' : 'false'); + formData.append('color_grade', settings.colorGrade ? 'true' : 'false'); + formData.append('silence_removal', settings.silenceRemoval ? 'true' : 'false'); + formData.append('subtitle_style', JSON.stringify(subtitleStyle)); const res = await fetch(getApiUrl('/api/process'), { method: 'POST', headers: { 'X-Gemini-Key': geminiKey }, @@ -58,48 +90,82 @@ function saveHistory(entry) { } catch {/* ignore */} } +function fmtRemaining(secs) { + if (secs == null || !Number.isFinite(secs)) return ''; + const s = Math.max(1, Math.round(secs)); + if (s < 60) return `~${s}s left`; + const m = Math.floor(s / 60), rem = s % 60; + return rem > 0 ? `~${m}m ${rem}s left` : `~${m}m left`; +} + +function estimatedTotalSec(file) { + if (file?.durationSec && Number.isFinite(file.durationSec)) { + return Math.max(ETA_FLOOR_SEC, file.durationSec * ETA_MULTIPLIER); + } + return ETA_FALLBACK_SEC; +} + export default function Processing({ wizard }) { const keys = useKeys(); + const brandKit = useBrandKit(); const files = wizard.data.files || []; + const settings = wizard.data.settings || {}; const jobs = wizard.data.jobs || {}; - const startedRef = useRef(false); + const subtitleStyle = buildSubtitleStyle(brandKit); const historySavedRef = useRef(false); - const [overallStatus, setOverallStatus] = useState('starting'); + // Track which files we're already submitting so a re-run of the init + // effect (e.g. when keys.gemini arrives later) doesn't double-fire + // startJob. Lives in a ref because we don't want re-renders for it. + const submittingRef = useRef(new Set()); + // Local "uploading" state so the row says "Uploading…" between the user + // hitting Start processing and the POST returning a job_id. + const [uploadingIds, setUploadingIds] = useState(new Set()); + // Tick once a second so the ETA countdown updates smoothly between polls. + const [, setNowTick] = useState(0); - // Kick off jobs once on first mount. + // Kick off jobs whenever the Gemini key is present. Re-runs when the + // key arrives later — the inside-loop guards (`submittingRef`, existing + // jobId) keep it idempotent. No startedRef gate, so users who land here + // before setting a key can recover by setting it from another tab/page. useEffect(() => { - if (startedRef.current) return; - startedRef.current = true; - if (!keys.gemini) { - setOverallStatus('error'); - return; - } - setOverallStatus('running'); + if (!keys.gemini) return; files.forEach(async (f) => { if (jobs[f.id]?.jobId) return; - // File object may not exist after a wizard rehydrate. + if (submittingRef.current.has(f.id)) return; + if (jobs[f.id]?.status === 'error') return; if (!(f.file instanceof File)) { wizard.setData((prev) => ({ ...prev, - jobs: { ...prev.jobs, [f.id]: { jobId: null, status: 'error', logs: ['Source file lost — re-upload to retry.'], result: null } }, + jobs: { ...prev.jobs, [f.id]: { jobId: null, status: 'error', logs: ['Source file lost — re-upload to retry.'], result: null, startedAt: null } }, })); return; } + submittingRef.current.add(f.id); + setUploadingIds((s) => new Set(s).add(f.id)); try { - const { job_id } = await startJob({ file: f.file, geminiKey: keys.gemini }); + const { job_id } = await startJob({ + file: f.file, + geminiKey: keys.gemini, + category: f.category, + settings, + subtitleStyle, + }); wizard.setData((prev) => ({ ...prev, - jobs: { ...prev.jobs, [f.id]: { jobId: job_id, status: 'processing', logs: [], result: null } }, + jobs: { ...prev.jobs, [f.id]: { jobId: job_id, status: 'processing', logs: [], result: null, startedAt: Date.now() } }, })); } catch (e) { wizard.setData((prev) => ({ ...prev, - jobs: { ...prev.jobs, [f.id]: { jobId: null, status: 'error', logs: [String(e.message || e)], result: null } }, + jobs: { ...prev.jobs, [f.id]: { jobId: null, status: 'error', logs: [String(e.message || e)], result: null, startedAt: null } }, })); + } finally { + submittingRef.current.delete(f.id); + setUploadingIds((s) => { const n = new Set(s); n.delete(f.id); return n; }); } }); // eslint-disable-next-line react-hooks/exhaustive-deps - }, []); + }, [keys.gemini]); // Poll status for every still-running job. Cleanup aborts in-flight // fetches and a `cancelled` flag stops late responses from committing @@ -159,13 +225,20 @@ export default function Processing({ wizard }) { // eslint-disable-next-line react-hooks/exhaustive-deps }, [Object.values(jobs).map((j) => j.status).join(',')]); + // Tick the ETA countdown every second while any job is in flight. + useEffect(() => { + const anyInFlight = uploadingIds.size > 0 || Object.values(jobs).some((j) => j.status === 'processing'); + if (!anyInFlight) return; + const id = setInterval(() => setNowTick((n) => n + 1), 1000); + return () => clearInterval(id); + }, [uploadingIds, jobs]); + // Detect all-done + persist a history entry once. useEffect(() => { const entries = Object.values(jobs); if (entries.length < files.length) return; const done = entries.every((j) => j.status === 'complete' || j.status === 'error'); if (!done) return; - setOverallStatus('complete'); if (historySavedRef.current) return; historySavedRef.current = true; saveHistory({ @@ -177,7 +250,14 @@ export default function Processing({ wizard }) { }); }, [jobs, files.length]); - const hasAnyComplete = Object.values(jobs).some((j) => j.status === 'complete'); + // Derived footer state. Reactive to keys.gemini and per-file outcomes, + // not a sticky setOverallStatus from the init effect. + const entries = Object.values(jobs); + const allTerminal = files.length > 0 + && entries.length >= files.length + && entries.every((j) => j.status === 'complete' || j.status === 'error'); + const anyTerminal = entries.some((j) => j.status === 'complete' || j.status === 'error'); + const hasAnyComplete = entries.some((j) => j.status === 'complete'); return ( <div className="h-full overflow-y-auto custom-scrollbar"> @@ -192,22 +272,53 @@ export default function Processing({ wizard }) { <div className="space-y-2"> {files.map((f) => { const j = jobs[f.id]; - const status = j?.status || 'queued'; + const isUploading = uploadingIds.has(f.id); + const status = isUploading + ? 'uploading' + : (j?.status || (keys.gemini ? 'queued' : 'awaiting_key')); const lastLog = j?.logs?.[j.logs.length - 1]; + + // Status caption: collapse the multiple wait states into + // specific user-readable text. + const caption = + status === 'awaiting_key' ? 'Waiting for Gemini key…' : + status === 'uploading' ? 'Uploading to backend…' : + status === 'queued' ? 'Queued for the next slot…' : + status === 'processing' ? (lastLog || 'Starting pipeline…') : + status === 'complete' ? `Generated ${j.result?.clips?.length || 0} clip${(j.result?.clips?.length || 0) === 1 ? '' : 's'}` : + status === 'error' ? (lastLog || 'Failed') : + 'Working…'; + + // ETA: only meaningful while the job is actually running on + // the backend (we know startedAt). Before submission we just + // show the estimated total. + const total = estimatedTotalSec(f); + let remaining = null; + if (status === 'processing' && j?.startedAt) { + const elapsed = (Date.now() - j.startedAt) / 1000; + remaining = total - elapsed; + } else if (status === 'uploading' || status === 'queued') { + remaining = total; + } + const showEta = remaining != null && remaining > 0 + && status !== 'complete' && status !== 'error'; + const overdue = status === 'processing' && j?.startedAt + && (Date.now() - j.startedAt) / 1000 > total + 10; + return ( <div key={f.id} className="rounded-lg border border-border bg-surface p-3"> <div className="flex items-center gap-3"> <StatusIcon status={status} /> <div className="flex-1 min-w-0"> <div className="text-[13px] text-white truncate">{f.name}</div> - <div className="text-[11px] text-zinc-500 truncate"> - {status === 'queued' ? 'Queued…' : - status === 'complete' ? `Generated ${j.result?.clips?.length || 0} clip${(j.result?.clips?.length || 0) === 1 ? '' : 's'}` : - status === 'error' ? (lastLog || 'Failed') : - (lastLog || 'Processing…')} - </div> + <div className="text-[11px] text-zinc-500 truncate">{caption}</div> + </div> + <div className="text-[10px] font-mono text-zinc-500 shrink-0 text-right tabular-nums"> + {status === 'complete' && <span className="text-success">done</span>} + {status === 'error' && <span className="text-red-400">failed</span>} + {showEta && !overdue && <span>{fmtRemaining(remaining)}</span>} + {overdue && <span>taking longer than expected…</span>} </div> - {j?.jobId && <span className="text-[10px] font-mono text-zinc-600 shrink-0">{j.jobId.slice(0, 8)}</span>} </div> </div> ); @@ -215,22 +326,32 @@ export default function Processing({ wizard }) { </div> <div className="flex items-center justify-between mt-6 pt-4 border-t border-border"> - <span className="text-[11px] text-zinc-500"> - {overallStatus === 'complete' ? 'All files done.' : - overallStatus === 'error' ? 'Missing Gemini key — set it in Settings.' : - 'You can wait, play Snake, or skip to whatever has finished.'} - </span> + {!keys.gemini ? ( + <span className="text-[11px] text-amber-300 flex items-center gap-2"> + <KeyRound size={12} /> + No Gemini key set —{' '} + <Link to="/settings/system/api-keys" className="underline hover:text-white"> + add one to start + </Link>. + </span> + ) : allTerminal ? ( + <span className="text-[11px] text-zinc-500">All files done.</span> + ) : ( + <span className="text-[11px] text-zinc-500"> + You can wait, play Snake, or skip to whatever has finished. + </span> + )} <div className="flex items-center gap-3"> <button onClick={() => wizard.goto(3)} - disabled={!hasAnyComplete} + disabled={!hasAnyComplete && !allTerminal} className="text-[13px] text-zinc-400 hover:text-white transition-colors disabled:opacity-40 disabled:cursor-not-allowed" > Skip → </button> <button onClick={() => wizard.goto(3)} - disabled={overallStatus !== 'complete'} + disabled={!anyTerminal} className="btn-primary px-5 py-2 text-[13px] disabled:opacity-40 disabled:cursor-not-allowed" > Review → @@ -252,8 +373,10 @@ export default function Processing({ wizard }) { } function StatusIcon({ status }) { - if (status === 'complete') return <CheckCircle2 size={16} className="text-success" />; - if (status === 'error') return <XCircle size={16} className="text-red-400" />; - if (status === 'queued') return <span className="w-2 h-2 rounded-full bg-zinc-700 inline-block mx-[3px]" />; + if (status === 'complete') return <CheckCircle2 size={16} className="text-success" />; + if (status === 'error') return <XCircle size={16} className="text-red-400" />; + if (status === 'awaiting_key') return <KeyRound size={14} className="text-amber-400" />; + if (status === 'queued') return <span className="w-2 h-2 rounded-full bg-zinc-700 inline-block mx-[3px]" />; + // uploading / processing / anything else in-flight return <Loader2 size={16} className="text-primary animate-spin" />; } From f5f8e245b6cb439fa303bf70bbf700b10a77979b Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 10:57:15 -0400 Subject: [PATCH 31/43] =?UTF-8?q?feat(short-form):=20Phase=202=20=E2=80=94?= =?UTF-8?q?=20/api/colorgrade=20+=20/api/silencecut=20+=20auto-pipeline=20?= =?UTF-8?q?wiring?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two state-mutating per-clip endpoints and wires them into the short-form auto-pipeline between AI-edit and subtitles, so the four Categorize toggles now all act on the output. - backend/app/editing/color_grade.py: 5 LUT presets (teal_orange, warm, cool, vivid, noir) implemented as FFmpeg-native filter chains. Avoids bundling .cube files (no licensing/binary-asset concerns). - backend/app/editing/silence.py: silencedetect-stderr parser, keep-window inversion math, and a cut_silence helper that uses select/aselect with filter_complex. Falls back to stream-copy when total silence < 50ms so the output is always produced. - backend/app/editing/auto_pipeline.py: apply_color_grade + apply_silence_cut helpers, idempotent on existing output files like the Phase 1 siblings. - backend/app/main.py: ColorGradeRequest (with LUT allowlist validator) and SilenceCutRequest (bounded noise_db and min_silence_sec), POST handlers mirroring /api/subtitle's persistence pattern, optional lut_name Form field on /api/process, and the color-grade + silence-cut steps wired into _run_auto_pipeline before subtitles. - 39 new unit tests across LUT presets, the silencedetect parser, keep-window math, cut_silence fallbacks, and the new Pydantic request models. - OpenAPI baseline regenerated (only /api/colorgrade, /api/silencecut, and the lut_name Form field diffed). pytest -m "not e2e" → 150/150 green. `npm run build` → 0 warnings. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- backend/app/editing/auto_pipeline.py | 59 ++++++ backend/app/editing/color_grade.py | 54 +++++ backend/app/editing/silence.py | 120 +++++++++++ backend/app/main.py | 200 +++++++++++++++++- backend/tests/snapshots/baseline.openapi.json | 162 ++++++++++++++ .../tests/unit/test_auto_pipeline_config.py | 49 +++++ backend/tests/unit/test_color_grade.py | 89 ++++++++ backend/tests/unit/test_silence.py | 168 +++++++++++++++ 8 files changed, 896 insertions(+), 5 deletions(-) create mode 100644 backend/app/editing/color_grade.py create mode 100644 backend/app/editing/silence.py create mode 100644 backend/tests/unit/test_color_grade.py create mode 100644 backend/tests/unit/test_silence.py diff --git a/backend/app/editing/auto_pipeline.py b/backend/app/editing/auto_pipeline.py index caeee20c..69e32c3e 100644 --- a/backend/app/editing/auto_pipeline.py +++ b/backend/app/editing/auto_pipeline.py @@ -8,6 +8,8 @@ from typing import Any, Dict, Optional from app.editing.ai_filters import VideoEditor +from app.editing.color_grade import DEFAULT_LUT, apply_lut, allowed_luts +from app.editing.silence import cut_silence from app.overlays.subtitles_generate import generate_srt, generate_srt_from_video from app.overlays.subtitles_render import burn_subtitles @@ -79,6 +81,63 @@ def apply_ai_edit( return edited_filename +def apply_color_grade( + *, + job_id: str, + input_filename: str, + lut_name: Optional[str] = None, +) -> str: + """Apply a named LUT preset to a clip. Writes ``graded_{input_filename}``. + + Idempotent: returns the existing output when present and non-empty. + Raises ``ValueError`` for unknown LUTs (caller should log and skip). + """ + job_dir = _job_dir(job_id) + input_path = os.path.join(job_dir, input_filename) + output_filename = f"graded_{input_filename}" + output_path = os.path.join(job_dir, output_filename) + + if os.path.exists(output_path) and os.path.getsize(output_path) > 0: + return output_filename + + name = (lut_name or DEFAULT_LUT).strip().lower() + if name not in allowed_luts(): + raise ValueError(f"lut_name must be one of {sorted(allowed_luts())}, got {name!r}") + + apply_lut(input_path, output_path, name) + return output_filename + + +def apply_silence_cut( + *, + job_id: str, + input_filename: str, + noise_db: float = -30.0, + min_silence_sec: float = 0.5, +) -> str: + """Cut silent segments from a clip. Writes ``silencecut_{input_filename}``. + + Idempotent: returns the existing output when present and non-empty. + A clip with no detectable silence is stream-copied so the output still + exists (matches the legacy per-clip flow's expectations). + """ + job_dir = _job_dir(job_id) + input_path = os.path.join(job_dir, input_filename) + output_filename = f"silencecut_{input_filename}" + output_path = os.path.join(job_dir, output_filename) + + if os.path.exists(output_path) and os.path.getsize(output_path) > 0: + return output_filename + + cut_silence( + input_path, + output_path, + noise_db=noise_db, + min_silence_sec=min_silence_sec, + ) + return output_filename + + def apply_subtitles( *, job_id: str, diff --git a/backend/app/editing/color_grade.py b/backend/app/editing/color_grade.py new file mode 100644 index 00000000..ca945e46 --- /dev/null +++ b/backend/app/editing/color_grade.py @@ -0,0 +1,54 @@ +"""Color-grade presets implemented as FFmpeg filter chains (no external .cube files).""" + +from __future__ import annotations + +import os +from typing import Dict + +from app.video import ffmpeg as ffmpeg_wrapper + + +# Each preset is a single ffmpeg -vf expression. Built from filters that ship +# in every modern ffmpeg (curves, colorbalance, eq, hue) so there is no +# dependency on a separate LUT asset that would need licensing review. +LUT_PRESETS: Dict[str, str] = { + "teal_orange": ( + "curves=" + "red='0/0 0.25/0.2 0.5/0.55 0.75/0.85 1/1':" + "green='0/0 0.5/0.5 1/1':" + "blue='0/0.1 0.25/0.35 0.5/0.5 0.75/0.55 1/0.85'" + ), + "warm": "colorbalance=rs=0.15:gs=0.05:bs=-0.15:rm=0.1:gm=0.05:bm=-0.1", + "cool": "colorbalance=rs=-0.1:gs=0.0:bs=0.15:rm=-0.05:bm=0.1", + "vivid": "eq=saturation=1.4:contrast=1.08:brightness=0.02", + "noir": "hue=s=0,eq=contrast=1.25:brightness=-0.03", +} + +DEFAULT_LUT = "teal_orange" + + +def allowed_luts() -> tuple: + return tuple(LUT_PRESETS.keys()) + + +def apply_lut(input_path: str, output_path: str, lut_name: str) -> None: + """Apply a named LUT preset via FFmpeg ``-vf``. Re-encodes video; copies audio. + + Raises ``ValueError`` for unknown LUTs and ``FFmpegError`` for ffmpeg failures. + """ + if lut_name not in LUT_PRESETS: + raise ValueError( + f"Unknown LUT {lut_name!r}. Allowed: {sorted(LUT_PRESETS.keys())}" + ) + + ffmpeg_wrapper.run([ + "-y", + "-i", input_path, + "-vf", LUT_PRESETS[lut_name], + "-c:v", "libx264", "-preset", "fast", "-crf", "20", + "-c:a", "copy", + output_path, + ]) + + if not os.path.exists(output_path) or os.path.getsize(output_path) == 0: + raise RuntimeError(f"Color grade produced empty output: {output_path}") diff --git a/backend/app/editing/silence.py b/backend/app/editing/silence.py new file mode 100644 index 00000000..c16db004 --- /dev/null +++ b/backend/app/editing/silence.py @@ -0,0 +1,120 @@ +"""Silence detection + removal via FFmpeg ``silencedetect`` + ``select``/``aselect``.""" + +from __future__ import annotations + +import os +import re +from typing import List, Tuple + +from app.video import ffmpeg as ffmpeg_wrapper + + +# Parsers expect lines like: +# [silencedetect @ 0x...] silence_start: 1.234 +# [silencedetect @ 0x...] silence_end: 5.678 | silence_duration: 4.444 +_RE_SILENCE_START = re.compile(r"silence_start:\s*(-?\d+(?:\.\d+)?)") +_RE_SILENCE_END = re.compile(r"silence_end:\s*(-?\d+(?:\.\d+)?)") + + +def parse_silence_segments(stderr_text: str) -> List[Tuple[float, float]]: + """Pair up ``silence_start`` / ``silence_end`` lines into ``(start, end)`` tuples. + + Tolerates a trailing unmatched ``silence_start`` (silence at end of file) + by dropping it — we cannot tell its end time without re-running detect. + """ + starts = [float(m.group(1)) for m in _RE_SILENCE_START.finditer(stderr_text)] + ends = [float(m.group(1)) for m in _RE_SILENCE_END.finditer(stderr_text)] + return list(zip(starts, ends)) + + +def invert_silences(silences: List[Tuple[float, float]], duration: float) -> List[Tuple[float, float]]: + """Return the complement of ``silences`` on ``[0, duration]`` — the keep windows.""" + keep: List[Tuple[float, float]] = [] + cursor = 0.0 + for s, e in silences: + if s > cursor: + keep.append((cursor, min(s, duration))) + cursor = max(cursor, e) + if cursor < duration: + keep.append((cursor, duration)) + return [(a, b) for a, b in keep if b > a] + + +def detect_silence_segments( + input_path: str, + *, + noise_db: float = -30.0, + min_silence_sec: float = 0.5, +) -> List[Tuple[float, float]]: + """Run ffmpeg ``silencedetect`` and return ``[(start, end), ...]`` in seconds.""" + result = ffmpeg_wrapper.run( + [ + "-i", input_path, + "-af", f"silencedetect=noise={noise_db}dB:d={min_silence_sec}", + "-f", "null", "-", + ], + check=False, + ) + stderr = (result.stderr or b"").decode(errors="replace") + return parse_silence_segments(stderr) + + +def _between_expr(intervals: List[Tuple[float, float]]) -> str: + """Build a ``between(t,a1,b1)+between(t,a2,b2)+...`` expression for select filters.""" + return "+".join(f"between(t,{a:.3f},{b:.3f})" for a, b in intervals) + + +def cut_silence( + input_path: str, + output_path: str, + *, + noise_db: float = -30.0, + min_silence_sec: float = 0.5, +) -> dict: + """Cut silent segments out of ``input_path`` and write the result to ``output_path``. + + Returns a small summary dict (``segments_removed``, ``seconds_removed``). + If no silence is detected or the keep-windows would collapse the clip, + falls back to a stream copy so the caller still gets a valid output. + """ + duration = ffmpeg_wrapper.probe_duration(input_path) + silences = detect_silence_segments( + input_path, + noise_db=noise_db, + min_silence_sec=min_silence_sec, + ) + keep = invert_silences(silences, duration) + + total_silence = sum(e - s for s, e in silences) + if not silences or not keep or total_silence < 0.05: + # Nothing meaningful to cut — copy the input through so the file + # exists at output_path for the caller's idempotency check. + ffmpeg_wrapper.run([ + "-y", "-i", input_path, + "-c", "copy", + output_path, + ]) + return {"segments_removed": 0, "seconds_removed": 0.0} + + expr = _between_expr(keep) + filter_complex = ( + f"[0:v]select='{expr}',setpts=N/FRAME_RATE/TB[v];" + f"[0:a]aselect='{expr}',asetpts=N/SR/TB[a]" + ) + ffmpeg_wrapper.run([ + "-y", + "-i", input_path, + "-filter_complex", filter_complex, + "-map", "[v]", "-map", "[a]", + "-c:v", "libx264", "-preset", "fast", "-crf", "20", + "-c:a", "aac", + output_path, + ]) + + if not os.path.exists(output_path) or os.path.getsize(output_path) == 0: + raise RuntimeError(f"Silence cut produced empty output: {output_path}") + + return { + "segments_removed": len(silences), + "seconds_removed": round(total_silence, 3), + } diff --git a/backend/app/main.py b/backend/app/main.py index f25375da..f22a1b8c 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -434,7 +434,12 @@ async def _run_auto_pipeline( return # Lazy import keeps cold-start cheap when no auto-pipeline is requested. - from app.editing.auto_pipeline import apply_ai_edit, apply_subtitles + from app.editing.auto_pipeline import ( + apply_ai_edit, + apply_color_grade, + apply_silence_cut, + apply_subtitles, + ) try: with open(metadata_path, 'r') as f: @@ -479,8 +484,38 @@ async def _run_auto_pipeline( except Exception as exc: jobs[job_id]['logs'].append(f"⚠️ auto AI-edit failed for clip {i + 1}: {exc}") - # color_grade — Phase 2 - # silence_removal — Phase 2 + if auto_config.get('color_grade'): + try: + graded = await loop.run_in_executor( + None, + partial( + apply_color_grade, + job_id=job_id, + input_filename=current_filename, + lut_name=auto_config.get('lut_name'), + ), + ) + variants['graded'] = graded + current_filename = graded + jobs[job_id]['logs'].append(f"🎨 auto color-grade applied to clip {i + 1}") + except Exception as exc: + jobs[job_id]['logs'].append(f"⚠️ auto color-grade failed for clip {i + 1}: {exc}") + + if auto_config.get('silence_removal'): + try: + silenced = await loop.run_in_executor( + None, + partial( + apply_silence_cut, + job_id=job_id, + input_filename=current_filename, + ), + ) + variants['silencecut'] = silenced + current_filename = silenced + jobs[job_id]['logs'].append(f"🔇 auto silence-cut applied to clip {i + 1}") + except Exception as exc: + jobs[job_id]['logs'].append(f"⚠️ auto silence-cut failed for clip {i + 1}: {exc}") if auto_config.get('auto_subtitles') and transcript: try: @@ -556,8 +591,9 @@ async def process_endpoint( category: Optional[str] = Form(None), auto_subtitles: Optional[str] = Form(None), auto_edit: Optional[str] = Form(None), - color_grade: Optional[str] = Form(None), # honored in Phase 2 - silence_removal: Optional[str] = Form(None), # honored in Phase 2 + color_grade: Optional[str] = Form(None), + lut_name: Optional[str] = Form(None), # Phase 2: which LUT preset; defaults to teal_orange + silence_removal: Optional[str] = Form(None), subtitle_style: Optional[str] = Form(None), # JSON string from useBrandKit() ): api_key = request.headers.get("X-Gemini-Key") @@ -604,11 +640,20 @@ async def process_endpoint( status_code=400, detail=f"Invalid category {cat_raw!r}. Allowed: {sorted(_AUTO_ALLOWED_CATEGORIES)}", ) + lut_raw = (lut_name or "").strip().lower() or None + if lut_raw is not None: + from app.editing.color_grade import allowed_luts as _luts + if lut_raw not in _luts(): + raise HTTPException( + status_code=400, + detail=f"Invalid lut_name {lut_raw!r}. Allowed: {sorted(_luts())}", + ) auto_config = { "category": cat_raw, "auto_edit": _normalize_bool_form(auto_edit), "auto_subtitles": _normalize_bool_form(auto_subtitles), "color_grade": _normalize_bool_form(color_grade), + "lut_name": lut_raw, "silence_removal": _normalize_bool_form(silence_removal), "subtitle_style": _parse_subtitle_style(subtitle_style), } @@ -1144,6 +1189,151 @@ def run_burn(): "new_video_url": f"/videos/{req.job_id}/{output_filename}" } + +# --------------------------------------------------------------------------- +# Phase 2: per-clip color grade + silence cut. State-mutating per-clip ops +# that mirror /api/edit and /api/subtitle. Auth/rate-limit/audit/abuse-cap are +# deferred to /gsd-secure-phase along with the rest of the existing routes +# (see HANDOFF.md §5 + Decision D3 in the plan handover). +# --------------------------------------------------------------------------- + + +class ColorGradeRequest(BaseModel): + job_id: str + clip_index: int + lut_name: str + input_filename: Optional[str] = None + + @field_validator("lut_name", mode="before") + @classmethod + def _normalize_lut(cls, v: Any) -> str: + from app.editing.color_grade import allowed_luts as _luts + s = str(v or "").strip().lower() + if s not in _luts(): + raise ValueError(f"lut_name must be one of {sorted(_luts())}") + return s + + +class SilenceCutRequest(BaseModel): + job_id: str + clip_index: int + input_filename: Optional[str] = None + noise_db: float = Field(default=-30.0, ge=-80.0, le=0.0) + min_silence_sec: float = Field(default=0.5, ge=0.05, le=10.0) + + +def _resolve_clip_input(job_id: str, clip_index: int, input_filename: Optional[str]) -> tuple: + """Shared input-path resolver for per-clip route handlers. + + Returns ``(job, output_dir, input_path, filename)``. Raises ``HTTPException`` + with the same codes the other per-clip routes use, so frontend handling + stays uniform. + """ + if job_id not in jobs: + raise HTTPException(status_code=404, detail="Job not found") + job = jobs[job_id] + if 'result' not in job or 'clips' not in job['result']: + raise HTTPException(status_code=400, detail="Job result not available") + if clip_index < 0 or clip_index >= len(job['result']['clips']): + raise HTTPException(status_code=404, detail="Clip not found") + + output_dir = os.path.join(OUTPUT_DIR, job_id) + if input_filename: + filename = os.path.basename(input_filename) + else: + filename = job['result']['clips'][clip_index]['video_url'].split('/')[-1] + input_path = os.path.join(output_dir, filename) + if not os.path.exists(input_path): + raise HTTPException(status_code=404, detail=f"Video file not found: {input_path}") + return job, output_dir, input_path, filename + + +def _persist_clip_url(job_id: str, clip_index: int, new_filename: str) -> None: + """Write the new clip URL back to in-memory jobs[] and to metadata.json.""" + new_url = f"/videos/{job_id}/{new_filename}" + job = jobs.get(job_id) + if job and clip_index < len(job['result']['clips']): + job['result']['clips'][clip_index]['video_url'] = new_url + + try: + json_files = glob.glob(os.path.join(OUTPUT_DIR, job_id, "*_metadata.json")) + if json_files: + with open(json_files[0], 'r') as f: + data = json.load(f) + clips = data.get('shorts', []) + if clip_index < len(clips): + clips[clip_index]['video_url'] = new_url + data['shorts'] = clips + with open(json_files[0], 'w') as f: + json.dump(data, f, indent=4) + except Exception as exc: + print(f"⚠️ Failed to update metadata.json for clip url: {exc}") + + +@app.post("/api/colorgrade") +async def color_grade_clip(req: ColorGradeRequest): + """Apply a named LUT preset to a clip. Writes ``graded_{input}.mp4``.""" + from app.editing.auto_pipeline import apply_color_grade + + _job, _output_dir, _input_path, filename = _resolve_clip_input( + req.job_id, req.clip_index, req.input_filename + ) + + loop = asyncio.get_event_loop() + try: + out_filename = await loop.run_in_executor( + None, + partial( + apply_color_grade, + job_id=req.job_id, + input_filename=filename, + lut_name=req.lut_name, + ), + ) + except Exception as exc: + print(f"❌ ColorGrade Error: {exc}") + raise HTTPException(status_code=500, detail=str(exc)) + + _persist_clip_url(req.job_id, req.clip_index, out_filename) + return { + "success": True, + "lut_name": req.lut_name, + "new_video_url": f"/videos/{req.job_id}/{out_filename}", + } + + +@app.post("/api/silencecut") +async def silence_cut_clip(req: SilenceCutRequest): + """Cut silent segments out of a clip. Writes ``silencecut_{input}.mp4``.""" + from app.editing.auto_pipeline import apply_silence_cut + + _job, _output_dir, _input_path, filename = _resolve_clip_input( + req.job_id, req.clip_index, req.input_filename + ) + + loop = asyncio.get_event_loop() + try: + out_filename = await loop.run_in_executor( + None, + partial( + apply_silence_cut, + job_id=req.job_id, + input_filename=filename, + noise_db=req.noise_db, + min_silence_sec=req.min_silence_sec, + ), + ) + except Exception as exc: + print(f"❌ SilenceCut Error: {exc}") + raise HTTPException(status_code=500, detail=str(exc)) + + _persist_clip_url(req.job_id, req.clip_index, out_filename) + return { + "success": True, + "new_video_url": f"/videos/{req.job_id}/{out_filename}", + } + + class HookRequest(BaseModel): job_id: str clip_index: int diff --git a/backend/tests/snapshots/baseline.openapi.json b/backend/tests/snapshots/baseline.openapi.json index 9f7fcd85..38bdb744 100644 --- a/backend/tests/snapshots/baseline.openapi.json +++ b/backend/tests/snapshots/baseline.openapi.json @@ -70,6 +70,17 @@ ], "title": "File" }, + "lut_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Lut Name" + }, "silence_removal": { "anyOf": [ { @@ -294,6 +305,40 @@ "title": "Body_upload_font_api_fonts_upload_post", "type": "object" }, + "ColorGradeRequest": { + "properties": { + "clip_index": { + "title": "Clip Index", + "type": "integer" + }, + "input_filename": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Input Filename" + }, + "job_id": { + "title": "Job Id", + "type": "string" + }, + "lut_name": { + "title": "Lut Name", + "type": "string" + } + }, + "required": [ + "job_id", + "clip_index", + "lut_name" + ], + "title": "ColorGradeRequest", + "type": "object" + }, "EditRequest": { "properties": { "api_key": { @@ -650,6 +695,49 @@ "title": "SaaSPostRequest", "type": "object" }, + "SilenceCutRequest": { + "properties": { + "clip_index": { + "title": "Clip Index", + "type": "integer" + }, + "input_filename": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Input Filename" + }, + "job_id": { + "title": "Job Id", + "type": "string" + }, + "min_silence_sec": { + "default": 0.5, + "maximum": 10.0, + "minimum": 0.05, + "title": "Min Silence Sec", + "type": "number" + }, + "noise_db": { + "default": -30.0, + "maximum": 0.0, + "minimum": -80.0, + "title": "Noise Db", + "type": "number" + } + }, + "required": [ + "job_id", + "clip_index" + ], + "title": "SilenceCutRequest", + "type": "object" + }, "SocialPostRequest": { "properties": { "api_key": { @@ -1019,6 +1107,43 @@ "summary": "Get Clip Transcript" } }, + "/api/colorgrade": { + "post": { + "description": "Apply a named LUT preset to a clip. Writes ``graded_{input}.mp4``.", + "operationId": "color_grade_clip_api_colorgrade_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ColorGradeRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Color Grade Clip" + } + }, "/api/config": { "get": { "operationId": "get_config_api_config_get", @@ -1794,6 +1919,43 @@ "summary": "Saasshorts Voices" } }, + "/api/silencecut": { + "post": { + "description": "Cut silent segments out of a clip. Writes ``silencecut_{input}.mp4``.", + "operationId": "silence_cut_clip_api_silencecut_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SilenceCutRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Silence Cut Clip" + } + }, "/api/social/post": { "post": { "operationId": "post_to_socials_api_social_post_post", diff --git a/backend/tests/unit/test_auto_pipeline_config.py b/backend/tests/unit/test_auto_pipeline_config.py index fea3205e..984c0769 100644 --- a/backend/tests/unit/test_auto_pipeline_config.py +++ b/backend/tests/unit/test_auto_pipeline_config.py @@ -13,6 +13,8 @@ from pydantic import ValidationError from app.main import ( + ColorGradeRequest, + SilenceCutRequest, SubtitleStyle, _AUTO_ALLOWED_CATEGORIES, _normalize_bool_form, @@ -181,3 +183,50 @@ def test_parse_subtitle_style_rejects_out_of_bounds(): def test_categories_allowlist_matches_frontend_contract(): # Matches the four cards in frontend/src/pages/ShortForm/steps/Categorize.jsx. assert _AUTO_ALLOWED_CATEGORIES == {"educational", "yap", "live", "viral"} + + +# --- Phase 2: ColorGradeRequest -------------------------------------------- + +def test_color_grade_request_normalizes_lut_case(): + r = ColorGradeRequest(job_id="j", clip_index=0, lut_name=" Teal_Orange ") + assert r.lut_name == "teal_orange" + + +@pytest.mark.parametrize("name", ["cool", "noir", "teal_orange", "vivid", "warm"]) +def test_color_grade_request_accepts_every_preset(name): + r = ColorGradeRequest(job_id="j", clip_index=0, lut_name=name) + assert r.lut_name == name + + +def test_color_grade_request_rejects_unknown_lut(): + with pytest.raises(ValidationError): + ColorGradeRequest(job_id="j", clip_index=0, lut_name="neon_dreams") + + +def test_color_grade_request_rejects_empty_lut(): + with pytest.raises(ValidationError): + ColorGradeRequest(job_id="j", clip_index=0, lut_name="") + + +# --- Phase 2: SilenceCutRequest -------------------------------------------- + +def test_silence_cut_request_defaults(): + r = SilenceCutRequest(job_id="j", clip_index=0) + assert r.noise_db == -30.0 + assert r.min_silence_sec == 0.5 + assert r.input_filename is None + + +def test_silence_cut_request_rejects_positive_noise_db(): + with pytest.raises(ValidationError): + SilenceCutRequest(job_id="j", clip_index=0, noise_db=5.0) + + +def test_silence_cut_request_rejects_overlong_silence_threshold(): + with pytest.raises(ValidationError): + SilenceCutRequest(job_id="j", clip_index=0, min_silence_sec=999.0) + + +def test_silence_cut_request_rejects_too_short_silence_threshold(): + with pytest.raises(ValidationError): + SilenceCutRequest(job_id="j", clip_index=0, min_silence_sec=0.001) diff --git a/backend/tests/unit/test_color_grade.py b/backend/tests/unit/test_color_grade.py new file mode 100644 index 00000000..e5d3a1c3 --- /dev/null +++ b/backend/tests/unit/test_color_grade.py @@ -0,0 +1,89 @@ +"""Tests for the Phase 2 color-grade preset module. + +Covers the LUT allowlist, the FFmpeg filter chains we ship, and the +``apply_lut`` wrapper's contract (rejects unknown LUTs without calling +ffmpeg). +""" +from __future__ import annotations + +from unittest.mock import patch + +import pytest + +from app.editing.color_grade import ( + DEFAULT_LUT, + LUT_PRESETS, + allowed_luts, + apply_lut, +) + + +def test_default_lut_is_in_presets(): + assert DEFAULT_LUT in LUT_PRESETS + + +def test_allowed_luts_matches_presets_keys(): + assert sorted(allowed_luts()) == sorted(LUT_PRESETS.keys()) + + +def test_presets_cover_the_expected_set(): + assert sorted(LUT_PRESETS.keys()) == ["cool", "noir", "teal_orange", "vivid", "warm"] + + +@pytest.mark.parametrize("name", sorted(LUT_PRESETS.keys())) +def test_every_preset_is_a_non_empty_string(name): + expr = LUT_PRESETS[name] + assert isinstance(expr, str) + assert expr.strip() != "" + + +def test_teal_orange_uses_curves_filter(): + assert LUT_PRESETS["teal_orange"].startswith("curves=") + + +def test_noir_drops_saturation(): + assert "s=0" in LUT_PRESETS["noir"] + + +def test_apply_lut_rejects_unknown_name(tmp_path): + src = tmp_path / "in.mp4" + src.write_bytes(b"fake") + dst = tmp_path / "out.mp4" + + with patch("app.editing.color_grade.ffmpeg_wrapper.run") as run_mock: + with pytest.raises(ValueError) as exc: + apply_lut(str(src), str(dst), "neon_dreams") + assert "Unknown LUT" in str(exc.value) + run_mock.assert_not_called() + + +def test_apply_lut_invokes_ffmpeg_with_expected_args(tmp_path): + src = tmp_path / "in.mp4" + src.write_bytes(b"fake") + dst = tmp_path / "out.mp4" + + def fake_run(args, **_kwargs): + # Mimic a successful ffmpeg run by creating the output file. + dst.write_bytes(b"graded") + return None + + with patch("app.editing.color_grade.ffmpeg_wrapper.run", side_effect=fake_run) as run_mock: + apply_lut(str(src), str(dst), "warm") + + args = run_mock.call_args.args[0] + assert "-vf" in args + assert args[args.index("-vf") + 1] == LUT_PRESETS["warm"] + assert args[-1] == str(dst) + + +def test_apply_lut_raises_when_ffmpeg_produces_empty_output(tmp_path): + src = tmp_path / "in.mp4" + src.write_bytes(b"fake") + dst = tmp_path / "out.mp4" + + # Simulate ffmpeg "succeeding" but writing nothing. + with patch("app.editing.color_grade.ffmpeg_wrapper.run") as run_mock: + run_mock.return_value = None + with pytest.raises(RuntimeError) as exc: + apply_lut(str(src), str(dst), "vivid") + assert "empty output" in str(exc.value) diff --git a/backend/tests/unit/test_silence.py b/backend/tests/unit/test_silence.py new file mode 100644 index 00000000..eb6ef467 --- /dev/null +++ b/backend/tests/unit/test_silence.py @@ -0,0 +1,168 @@ +"""Tests for the Phase 2 silence-detection helpers. + +Cover the silencedetect stderr parser, the keep-window inversion math, +and ``cut_silence``'s fallback behavior when nothing is silent. +""" +from __future__ import annotations + +from unittest.mock import patch + +import pytest + +from app.editing.silence import ( + cut_silence, + invert_silences, + parse_silence_segments, +) + + +# --- parse_silence_segments ------------------------------------------------ + +def test_parse_returns_empty_when_no_silence_lines(): + assert parse_silence_segments("just some ffmpeg log output") == [] + + +def test_parse_pairs_start_and_end(): + text = ( + "[silencedetect @ 0x55] silence_start: 1.234\n" + "[silencedetect @ 0x55] silence_end: 2.345 | silence_duration: 1.111\n" + ) + assert parse_silence_segments(text) == [(1.234, 2.345)] + + +def test_parse_multiple_segments(): + text = ( + "silence_start: 0.5\nsilence_end: 1.2 | silence_duration: 0.7\n" + "silence_start: 3.4\nsilence_end: 4.8 | silence_duration: 1.4\n" + "silence_start: 7.0\nsilence_end: 9.5 | silence_duration: 2.5\n" + ) + assert parse_silence_segments(text) == [ + (0.5, 1.2), + (3.4, 4.8), + (7.0, 9.5), + ] + + +def test_parse_drops_unmatched_trailing_start(): + text = ( + "silence_start: 1.0\nsilence_end: 2.0\n" + "silence_start: 5.0\n" # no matching end → file-end silence + ) + # zip() truncates to the shorter list, so the trailing start is dropped. + assert parse_silence_segments(text) == [(1.0, 2.0)] + + +# --- invert_silences ------------------------------------------------------- + +def test_invert_no_silence_keeps_whole_clip(): + assert invert_silences([], 10.0) == [(0.0, 10.0)] + + +def test_invert_silence_at_start(): + keep = invert_silences([(0.0, 2.0)], 10.0) + assert keep == [(2.0, 10.0)] + + +def test_invert_silence_at_end(): + keep = invert_silences([(8.0, 10.0)], 10.0) + assert keep == [(0.0, 8.0)] + + +def test_invert_middle_silence_splits_into_two(): + keep = invert_silences([(3.0, 5.0)], 10.0) + assert keep == [(0.0, 3.0), (5.0, 10.0)] + + +def test_invert_three_silences_yields_correct_keep_windows(): + keep = invert_silences([(0.0, 1.0), (3.0, 4.0), (8.0, 9.0)], 10.0) + assert keep == [(1.0, 3.0), (4.0, 8.0), (9.0, 10.0)] + + +def test_invert_overlapping_silences_collapse(): + keep = invert_silences([(1.0, 3.0), (2.5, 4.0)], 10.0) + # second silence starts before first ends — cursor jumps to 4.0 + assert keep == [(0.0, 1.0), (4.0, 10.0)] + + +def test_invert_silence_spanning_full_clip_returns_empty(): + assert invert_silences([(0.0, 10.0)], 10.0) == [] + + +# --- cut_silence happy paths / fallbacks ----------------------------------- + +def test_cut_silence_falls_back_to_copy_when_no_silence(tmp_path): + src = tmp_path / "in.mp4" + src.write_bytes(b"fake") + dst = tmp_path / "out.mp4" + + def fake_run(args, **_kwargs): + # Copy-only call must use ``-c copy``. + assert "-c" in args and args[args.index("-c") + 1] == "copy" + dst.write_bytes(b"copied") + return None + + with patch("app.editing.silence.ffmpeg_wrapper.probe_duration", return_value=10.0), \ + patch("app.editing.silence.ffmpeg_wrapper.run", side_effect=fake_run), \ + patch("app.editing.silence.detect_silence_segments", return_value=[]): + result = cut_silence(str(src), str(dst)) + + assert result == {"segments_removed": 0, "seconds_removed": 0.0} + + +def test_cut_silence_bails_out_when_total_silence_is_negligible(tmp_path): + src = tmp_path / "in.mp4" + src.write_bytes(b"fake") + dst = tmp_path / "out.mp4" + + def fake_run(args, **_kwargs): + # Bail-out path must still copy, not run a filter graph. + assert "-filter_complex" not in args + dst.write_bytes(b"copied") + return None + + with patch("app.editing.silence.ffmpeg_wrapper.probe_duration", return_value=10.0), \ + patch("app.editing.silence.ffmpeg_wrapper.run", side_effect=fake_run), \ + patch("app.editing.silence.detect_silence_segments", return_value=[(1.0, 1.02)]): + result = cut_silence(str(src), str(dst)) + + assert result["segments_removed"] == 0 + + +def test_cut_silence_invokes_filter_complex_when_silence_present(tmp_path): + src = tmp_path / "in.mp4" + src.write_bytes(b"fake") + dst = tmp_path / "out.mp4" + + captured = {} + + def fake_run(args, **_kwargs): + captured["args"] = args + dst.write_bytes(b"cut") + return None + + with patch("app.editing.silence.ffmpeg_wrapper.probe_duration", return_value=10.0), \ + patch("app.editing.silence.ffmpeg_wrapper.run", side_effect=fake_run), \ + patch("app.editing.silence.detect_silence_segments", return_value=[(3.0, 5.0)]): + result = cut_silence(str(src), str(dst)) + + assert result["segments_removed"] == 1 + assert result["seconds_removed"] == 2.0 + assert "-filter_complex" in captured["args"] + filter_expr = captured["args"][captured["args"].index("-filter_complex") + 1] + assert "select=" in filter_expr + assert "aselect=" in filter_expr + assert "between(t,0.000,3.000)" in filter_expr + assert "between(t,5.000,10.000)" in filter_expr + + +def test_cut_silence_raises_when_ffmpeg_output_missing(tmp_path): + src = tmp_path / "in.mp4" + src.write_bytes(b"fake") + dst = tmp_path / "out.mp4" # never written by fake_run + + with patch("app.editing.silence.ffmpeg_wrapper.probe_duration", return_value=10.0), \ + patch("app.editing.silence.ffmpeg_wrapper.run", return_value=None), \ + patch("app.editing.silence.detect_silence_segments", return_value=[(3.0, 5.0)]): + with pytest.raises(RuntimeError) as exc: + cut_silence(str(src), str(dst)) + assert "empty output" in str(exc.value) From b99a87ee221f2b2d79a479ae90de95bff893f7e3 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 13:48:19 -0400 Subject: [PATCH 32/43] fix(short-form): re-transcribe silence-cut clips + refresh stale Phase-2 copy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After Phase 2 wired silence_cut into the auto-pipeline, the cached transcript no longer matches the audio when subtitles run after silence cut — words drift forward of where they're spoken. The dubbed-clip path already handles this by re-transcribing from the actual audio; widen the same check to also catch ``silencecut_`` filenames in both ``auto_pipeline.apply_subtitles`` and the /api/subtitle route handler. Also clear three now-stale "lands in Phase 2" references that confused users on the Categorize step and in the run_job docstrings: - frontend/src/pages/ShortForm/steps/Categorize.jsx: hint copy for the color grade + silence removal toggles no longer claims they're unimplemented. - backend/app/main.py: ``_run_auto_pipeline`` header comment + docstring drop the "(Phase 2)" annotations, and the ``variants`` dict no longer flags graded/silencecut as future work. Smoke-tested in the browser end-to-end with all 4 toggles on: a 3-clip job produced ``subtitled_silencecut_graded_edited_*.mp4`` polished URLs, the teal-orange LUT is visibly applied, and pytest stays at 150/150. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- backend/app/editing/auto_pipeline.py | 11 ++++++---- backend/app/main.py | 22 ++++++++++--------- .../src/pages/ShortForm/steps/Categorize.jsx | 4 ++-- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/backend/app/editing/auto_pipeline.py b/backend/app/editing/auto_pipeline.py index 69e32c3e..7bfbf3f1 100644 --- a/backend/app/editing/auto_pipeline.py +++ b/backend/app/editing/auto_pipeline.py @@ -173,10 +173,13 @@ def apply_subtitles( max_words = None text_case = style.get('text_case') or 'original' - # Mirror /api/subtitle: dubbed clips need fresh transcription because - # the original transcript no longer matches the audio track. - is_dubbed = input_filename.startswith("translated_") - if is_dubbed: + # Dubbed clips and silence-cut clips both invalidate the original + # transcript timings, so we re-transcribe from the actual audio track. + needs_retranscribe = ( + input_filename.startswith("translated_") + or input_filename.startswith("silencecut_") + ) + if needs_retranscribe: success = generate_srt_from_video(input_path, srt_path, max_words=max_words, text_case=text_case) else: success = generate_srt(transcript, clip_start, clip_end, srt_path, max_words=max_words, text_case=text_case) diff --git a/backend/app/main.py b/backend/app/main.py index f22a1b8c..0e16e651 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -189,14 +189,14 @@ async def lifespan(app: FastAPI): app.mount("/thumbnails", StaticFiles(directory=THUMBNAILS_DIR), name="thumbnails") # --------------------------------------------------------------------------- -# Short-form auto-pipeline config (Phase 1 — see ~/.claude/plans/...-cray.md) +# Short-form auto-pipeline config (Phase 1+2 — see ~/.claude/plans/...-cray.md) # # When POST /api/process arrives with auto_subtitles / auto_edit / color_grade # / silence_removal toggles, run_job dispatches the post-processing chain # AFTER the CLI subprocess produces the raw reframed clips. Each enabled step # writes a sibling file (originals are preserved) and the polished URL is -# what the wizard surfaces in Review. The full chain (Phase 2 wires the last -# two) is: AI edit → color grade → silence removal → subtitles. +# what the wizard surfaces in Review. The chain is: +# AI edit → color grade → silence removal → subtitles. # --------------------------------------------------------------------------- _AUTO_ALLOWED_CATEGORIES = {"educational", "yap", "live", "viral"} @@ -421,9 +421,9 @@ async def _run_auto_pipeline( ) -> None: """Apply the per-clip post-processing chain configured in /api/process. - Order: AI edit → color grade (Phase 2) → silence removal (Phase 2) → subtitles. - Each step writes a sibling file; the original ``_clip_N.mp4`` is preserved - so Review's per-clip toggles (Phase 3) can swap URLs without re-rendering. + Order: AI edit → color grade → silence removal → subtitles. Each step + writes a sibling file; the original ``_clip_N.mp4`` is preserved so + Review's per-clip toggles (Phase 3) can swap URLs without re-rendering. Per-clip failures append to ``jobs[job_id]['logs']`` and do NOT mark the job as failed — a polished clip missing one effect is better than nothing. @@ -461,8 +461,8 @@ async def _run_auto_pipeline( variants: Dict[str, Optional[str]] = { 'original': original_filename, 'edited': None, - 'graded': None, # Phase 2 - 'silencecut': None, # Phase 2 + 'graded': None, + 'silencecut': None, 'subtitled': None, } @@ -1127,8 +1127,10 @@ async def add_subtitles(req: SubtitleRequest): try: # 1. Generate SRT - # Check if this is a dubbed video - if so, transcribe it fresh - is_dubbed = filename.startswith("translated_") + # Re-transcribe whenever the audio track no longer matches the cached + # transcript: dubbed clips (translated_) and silence-cut clips both + # invalidate the original word-level timings. + is_dubbed = filename.startswith("translated_") or filename.startswith("silencecut_") # Brand-kit words-per-line: if provided, cap line length to N words. max_words = req.words_per_line if (req.words_per_line and req.words_per_line > 0) else None diff --git a/frontend/src/pages/ShortForm/steps/Categorize.jsx b/frontend/src/pages/ShortForm/steps/Categorize.jsx index 94804ff5..b252f870 100644 --- a/frontend/src/pages/ShortForm/steps/Categorize.jsx +++ b/frontend/src/pages/ShortForm/steps/Categorize.jsx @@ -39,8 +39,8 @@ const CATEGORIES = [ // Order matches the run_job auto-pipeline chain: AI edit → grade → silence → subtitles. const TOGGLES = [ { id: 'autoEdit', label: 'AI edits (zoom / pan / cuts)', hint: 'Gemini picks zoompan + cut points per clip.' }, - { id: 'colorGrade', label: 'Color grade', hint: 'Apply a cinematic LUT (lands with /api/colorgrade in Phase 2).' }, - { id: 'silenceRemoval', label: 'Silence removal', hint: 'Auto-cut dead air (lands with /api/silencecut in Phase 2).' }, + { id: 'colorGrade', label: 'Color grade', hint: 'Apply a cinematic LUT (teal-orange by default).' }, + { id: 'silenceRemoval', label: 'Silence removal', hint: 'Auto-cut dead air from your clip.' }, { id: 'autoSubtitles', label: 'Auto subtitles', hint: 'Transcribe + burn captions with brand-kit style.' }, ]; From f21736fd705ef476426071b2f63fc7dfae230aae Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 13:55:52 -0400 Subject: [PATCH 33/43] chore: ignore session-local artifacts (.session/ + .compact-ultra symlink) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reorganized the per-session scratch space — handovers now live in .session/compact-ultra/ and test screenshots in .session/screenshots/. A .compact-ultra → .session/compact-ultra symlink keeps the compact-ultra skill's hard-coded save path working transparently. Both .session/ and the .compact-ultra symlink are gitignored so they don't pollute git status across sessions. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index c7c6eeaf..e49f13f0 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,7 @@ backend/tests/fixtures/smoke.mp4 skills/ skills-lock.json +# Session-local artifacts (handovers, test screenshots) — not for git +.session/ +.compact-ultra + From 6a38cf146ac99c0d5987bfe2564213563c921f57 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 14:16:48 -0400 Subject: [PATCH 34/43] =?UTF-8?q?feat(short-form):=20Phase=203=20=E2=80=94?= =?UTF-8?q?=20per-clip=20stage=20selector=20+=20LUT=20picker=20in=20Review?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a 5-button segmented control (Original | + Edit | + Grade | + Cut | + Subs) beneath the phone preview that swaps the displayed variant URL on click. The auto-pipeline already emits a chain of variants per clip (original → edited → graded → silencecut → subtitled); this lets the user step through them. Missing variants get a [+] icon. Clicking [+] calls the matching endpoint (/api/edit, /api/colorgrade, /api/silencecut, /api/subtitle) with the most recent existing variant as input_filename, then merges the returned filename into wizard.data and switches the displayed stage to it. A LUT picker dropdown (cool / noir / teal_orange / vivid / warm) appears under the segmented control when the graded stage is selected. Changing the LUT triggers /api/colorgrade with the chosen lut_name; the dropdown is disabled while the request is in flight. Stage selection + chosen LUT persist per-clip in wizard.data.clipStages / clipLuts so reloads keep the user's choices (modulo the existing rehydrate-bounce when the source File handle is lost — pre-existing wizard behavior, unchanged). Smoke-tested in browser: stage swap updates Download URL, LUT change to noir re-grades the on-disk file (mtime confirmed), /api/silencecut returns the expected response shape. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- frontend/src/pages/ShortForm/steps/Review.jsx | 309 +++++++++++++++++- 1 file changed, 305 insertions(+), 4 deletions(-) diff --git a/frontend/src/pages/ShortForm/steps/Review.jsx b/frontend/src/pages/ShortForm/steps/Review.jsx index 21392da5..4071210e 100644 --- a/frontend/src/pages/ShortForm/steps/Review.jsx +++ b/frontend/src/pages/ShortForm/steps/Review.jsx @@ -1,7 +1,13 @@ // Step 4: Review. Split view — clip list (left) + phone preview + export bar. // +// Phase 3: per-clip stage selector. The auto-pipeline emits a chain of variants +// (original → edited → graded → silencecut → subtitled) per clip; here we let +// the user step through them. Missing variants can be generated inline via the +// existing /api/edit, /api/colorgrade, /api/silencecut, /api/subtitle endpoints. +// Selection + LUT choice persist in wizard.data so reloads keep them. +// // Export wiring: -// - Download: opens the generated clip URL (existing /api/status results). +// - Download: opens the currently-displayed variant URL. // - Publish: pushes a notification + would call POST /api/social/post. // Backend doesn't queue these yet (plan TODO #9), so we // surface the intent locally via the bell. @@ -9,14 +15,34 @@ // - Send to CapCut: placeholder — backend integration TODO. import { useEffect, useMemo, useState } from 'react'; -import { Download, Eye, Scissors } from 'lucide-react'; +import { Download, Eye, Loader2, Plus, Scissors } from 'lucide-react'; import PhoneFrame from '../../../components/ui/PhoneFrame.jsx'; import PlatformBadge from '../../../components/ui/PlatformBadge.jsx'; import { getApiUrl } from '../../../config'; import { pushNotification } from '../../../state/notificationsStore.js'; +import { useKeys } from '../../../state/keysStore.js'; +import { useBrandKit } from '../../../lib/brandKit.js'; const PLATFORMS = ['youtube', 'tiktok', 'instagram', 'snapchat', 'facebook']; +// Chain order must match backend/app/main.py:_run_auto_pipeline. +const STAGES = [ + { key: 'original', label: 'Original', short: 'Original' }, + { key: 'edited', label: '+ AI Edit', short: '+ Edit' }, + { key: 'graded', label: '+ Color Grade', short: '+ Grade' }, + { key: 'silencecut', label: '+ Silence Cut', short: '+ Cut' }, + { key: 'subtitled', label: '+ Subtitles', short: '+ Subs' }, +]; + +// Must match backend/app/editing/color_grade.py:LUT_PRESETS (allowlist enforced +// server-side via the ColorGradeRequest validator). +const LUTS = ['teal_orange', 'warm', 'cool', 'vivid', 'noir']; +const DEFAULT_LUT = 'teal_orange'; + +function clipKey(c) { + return `${c.jobId}-${c.clipIndex}`; +} + function flattenClips(jobs, files) { const out = []; for (const f of files) { @@ -36,6 +62,27 @@ function flattenClips(jobs, files) { return out; } +// Pick the deepest stage whose variant exists, walking the chain backwards. +// Used as the initial display when the wizard first lands on Review. +function pickInitialStage(variants) { + if (!variants) return 'original'; + for (let i = STAGES.length - 1; i >= 0; i--) { + if (variants[STAGES[i].key]) return STAGES[i].key; + } + return 'original'; +} + +// Walk backwards from `targetStage` to find the most recent existing variant — +// that's the input to feed when generating the missing one. Falls back to original. +function priorVariantFilename(variants, targetStage) { + const idx = STAGES.findIndex((s) => s.key === targetStage); + for (let i = idx - 1; i >= 0; i--) { + const f = variants?.[STAGES[i].key]; + if (f) return f; + } + return variants?.original || null; +} + export default function Review({ wizard }) { const files = wizard.data.files || []; const jobs = wizard.data.jobs || {}; @@ -44,8 +91,33 @@ export default function Review({ wizard }) { const [showOriginal, setShowOriginal] = useState(false); const [sourceUrl, setSourceUrl] = useState(null); + // Per-clip transient state — loading flag + last error. Lost on reload (OK). + const [pendingStage, setPendingStage] = useState(null); // { clipKey, stageKey } + const [stageError, setStageError] = useState(null); // { clipKey, message } + + const keys = useKeys(); + const brand = useBrandKit(); + const current = clips[Math.min(selected, clips.length - 1)] || null; - const clipUrl = current?.clip?.video_url ? getApiUrl(current.clip.video_url) : null; + + // Persisted per-clip state (auto-saves through wizard.data). + const clipStages = wizard.data.clipStages || {}; + const clipLuts = wizard.data.clipLuts || {}; + + const variants = current?.clip?.variants || null; + const currentClipKey = current ? clipKey(current) : null; + const selectedStage = currentClipKey + ? (clipStages[currentClipKey] || pickInitialStage(variants)) + : 'original'; + const lutName = currentClipKey ? (clipLuts[currentClipKey] || DEFAULT_LUT) : DEFAULT_LUT; + + // Resolve the URL for the currently-selected stage, falling back to the + // deepest existing variant, then to the polished URL the backend set, then + // to the raw clip URL (covers legacy clips without a variants dict). + const stageFilename = variants?.[selectedStage] || variants?.polished || null; + const clipUrl = stageFilename + ? getApiUrl(`/videos/${current.jobId}/${stageFilename}`) + : (current?.clip?.video_url ? getApiUrl(current.clip.video_url) : null); // Build a blob URL for the original source file — only available when // the wizard has the in-memory File (lost after reload). @@ -68,6 +140,166 @@ export default function Review({ wizard }) { ); } + function setClipStage(stageKey) { + if (!currentClipKey) return; + wizard.setData({ clipStages: { ...clipStages, [currentClipKey]: stageKey } }); + } + + function setClipLut(lut) { + if (!currentClipKey) return; + wizard.setData({ clipLuts: { ...clipLuts, [currentClipKey]: lut } }); + } + + // Merge a new variant into wizard.data.jobs[fileId].result.clips[i].variants. + // This is the central state update — every successful generation flows through here. + function mergeVariant(stageKey, newFilename) { + if (!current) return; + wizard.setData((prev) => { + const job = prev.jobs?.[current.fileId]; + if (!job?.result?.clips) return prev; + const newClips = job.result.clips.map((c, i) => { + if (i !== current.clipIndex) return c; + const newVariants = { ...(c.variants || { original: c.video_url?.split('/').pop() }) }; + newVariants[stageKey] = newFilename; + return { ...c, variants: newVariants }; + }); + return { + ...prev, + jobs: { + ...prev.jobs, + [current.fileId]: { + ...job, + result: { ...job.result, clips: newClips }, + }, + }, + }; + }); + } + + async function generateStage(stageKey) { + if (!current) return; + const cKey = clipKey(current); + setPendingStage({ clipKey: cKey, stageKey }); + setStageError(null); + + try { + const inputFilename = priorVariantFilename(variants, stageKey); + if (!inputFilename) throw new Error('No source variant available'); + + let newFilename = null; + + if (stageKey === 'edited') { + if (!keys.gemini) throw new Error('Set your Gemini key in Settings first'); + const res = await fetch(getApiUrl('/api/edit'), { + method: 'POST', + headers: { 'Content-Type': 'application/json', 'X-Gemini-Key': keys.gemini }, + body: JSON.stringify({ + job_id: current.jobId, + clip_index: current.clipIndex, + input_filename: inputFilename, + }), + }); + if (!res.ok) throw new Error(await res.text()); + const data = await res.json(); + newFilename = data.new_video_url?.split('/').pop(); + } else if (stageKey === 'graded') { + const res = await fetch(getApiUrl('/api/colorgrade'), { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + job_id: current.jobId, + clip_index: current.clipIndex, + input_filename: inputFilename, + lut_name: lutName, + }), + }); + if (!res.ok) throw new Error(await res.text()); + const data = await res.json(); + newFilename = data.new_video_url?.split('/').pop(); + } else if (stageKey === 'silencecut') { + const res = await fetch(getApiUrl('/api/silencecut'), { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + job_id: current.jobId, + clip_index: current.clipIndex, + input_filename: inputFilename, + }), + }); + if (!res.ok) throw new Error(await res.text()); + const data = await res.json(); + newFilename = data.new_video_url?.split('/').pop(); + } else if (stageKey === 'subtitled') { + const res = await fetch(getApiUrl('/api/subtitle'), { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + job_id: current.jobId, + clip_index: current.clipIndex, + input_filename: inputFilename, + position: brand?.position || 'bottom', + font_size: brand?.font_size || 50, + font_name: brand?.font_name || 'Anton', + font_color: brand?.font_color || '#FFFF00', + border_color: brand?.border_color || '#000000', + border_width: brand?.border_width || 4, + bg_opacity: brand?.bg_opacity ?? 0, + words_per_line: brand?.words_per_line || 3, + text_case: brand?.text_case || 'upper', + }), + }); + if (!res.ok) throw new Error(await res.text()); + const data = await res.json(); + newFilename = data.new_video_url?.split('/').pop(); + } + + if (!newFilename) throw new Error('Empty response from backend'); + mergeVariant(stageKey, newFilename); + // Switch to the freshly-generated stage so the preview swaps immediately. + wizard.setData((prev) => ({ + ...prev, + clipStages: { ...(prev.clipStages || {}), [cKey]: stageKey }, + })); + } catch (e) { + setStageError({ clipKey: cKey, message: String(e.message || e) }); + setTimeout(() => setStageError(null), 6000); + } finally { + setPendingStage(null); + } + } + + async function regenerateGrade(newLut) { + if (!current) return; + setClipLut(newLut); + // If we already have a graded variant, regenerate with the new LUT. + if (variants?.graded) { + setPendingStage({ clipKey: clipKey(current), stageKey: 'graded' }); + setStageError(null); + try { + const inputFilename = priorVariantFilename(variants, 'graded'); + const res = await fetch(getApiUrl('/api/colorgrade'), { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + job_id: current.jobId, + clip_index: current.clipIndex, + input_filename: inputFilename, + lut_name: newLut, + }), + }); + if (!res.ok) throw new Error(await res.text()); + const data = await res.json(); + const newFilename = data.new_video_url?.split('/').pop(); + if (newFilename) mergeVariant('graded', newFilename); + } catch (e) { + setStageError({ clipKey: clipKey(current), message: String(e.message || e) }); + setTimeout(() => setStageError(null), 6000); + } finally { + setPendingStage(null); + } + } + } + function publish(platform, scheduled) { if (!current) return; pushNotification({ @@ -90,6 +322,9 @@ export default function Review({ wizard }) { current?.clip?.description || ''; + const isPending = pendingStage?.clipKey === currentClipKey; + const isErr = stageError?.clipKey === currentClipKey; + return ( <div className="h-full flex"> <aside className="w-[230px] shrink-0 border-r border-border bg-background overflow-y-auto custom-scrollbar p-3 space-y-1"> @@ -141,12 +376,78 @@ export default function Review({ wizard }) { {showOriginal && sourceUrl ? ( <video key={`src-${selected}`} src={sourceUrl} controls className="w-full h-full object-contain" /> ) : clipUrl ? ( - <video key={`clip-${selected}`} src={clipUrl} controls className="w-full h-full object-cover" /> + <video key={`clip-${selected}-${selectedStage}`} src={clipUrl} controls className="w-full h-full object-cover" /> ) : ( <div className="text-zinc-600 text-[12px] p-4 text-center">No preview available.</div> )} </PhoneFrame> + {/* Stage selector — segmented row. Lights up the currently-displayed + variant and exposes a [+] on each missing stage so the user can + fill it in without leaving Review. */} + {!showOriginal && current && ( + <div className="flex flex-col items-center gap-2"> + <div className="inline-flex rounded-lg border border-border bg-surface p-0.5 text-[12px]"> + {STAGES.map((stage) => { + const has = !!variants?.[stage.key]; + const isActive = stage.key === selectedStage; + const isThisPending = isPending && pendingStage.stageKey === stage.key; + const clickable = has || !isThisPending; + return ( + <button + key={stage.key} + disabled={!clickable} + onClick={() => has ? setClipStage(stage.key) : generateStage(stage.key)} + title={has + ? `Show ${stage.label.replace(/^\+ /, '').toLowerCase()} variant` + : `Generate ${stage.label.replace(/^\+ /, '').toLowerCase()}`} + className={`px-3 py-1.5 rounded-md transition-colors flex items-center gap-1.5 disabled:opacity-50 disabled:cursor-not-allowed ${ + isActive + ? 'bg-primary/20 text-white border border-primary/40' + : has + ? 'text-zinc-300 hover:bg-white/5' + : 'text-zinc-500 hover:text-zinc-300 hover:bg-white/5' + }`} + > + {isThisPending ? ( + <Loader2 size={12} className="animate-spin" /> + ) : !has ? ( + <Plus size={11} /> + ) : null} + <span>{stage.short}</span> + </button> + ); + })} + </div> + + {/* LUT picker — only relevant when the user is viewing or about + to generate the graded stage. Sliding the dropdown re-grades + in place if a graded variant already exists. */} + {(selectedStage === 'graded' || (!variants?.graded && pendingStage?.stageKey === 'graded')) && ( + <div className="flex items-center gap-2 text-[11px] text-zinc-400"> + <label htmlFor="lut-picker">LUT:</label> + <select + id="lut-picker" + value={lutName} + onChange={(e) => regenerateGrade(e.target.value)} + disabled={isPending} + className="bg-surface border border-border rounded-md px-2 py-1 text-zinc-200 text-[11px] disabled:opacity-50" + > + {LUTS.map((l) => ( + <option key={l} value={l}>{l.replace('_', ' ')}</option> + ))} + </select> + </div> + )} + + {isErr && ( + <div className="text-[11px] text-red-400 max-w-md text-center" role="alert"> + {stageError.message} + </div> + )} + </div> + )} + {title && ( <div className="text-center max-w-md"> <div className="text-[13px] text-white font-medium">{title}</div> From 3b52cd67ec4a15a7d23551c15717b2363d798a7b Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 14:39:11 -0400 Subject: [PATCH 35/43] docs(ai-restyle): design spec for new sidebar product MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AI Restyle is a new sibling to Short-form / Long-form. User uploads a video they made; product relights and re-backgrounds it using a Nano Banana frame as the style reference for a video-to-video model, preserving the original motion, content, and audio. v1 scope: - Sidebar entry between Long-form and Short-form - 3-step wizard: Upload → Configure → Review - Two preset dimensions (Background + Lighting), 5 hand-tuned seed presets each, CRUD in Settings → "AI Restyle" tab - Per-job prompt override via inline textarea - 30s duration cap - Original audio preserved bit-for-bit - No editing/subs/color-grade (those belong to Short-form; "Send to Short-form" CTA closes the loop) Video-to-video model choice deferred to implementation Phase 0 spike (Wan v2.5 / Luma Ray2 / Runway Gen-3 candidates; cost ≤$2 per 30s). 10 explicit decisions documented (D1-D10), including the 30s cap rationale, the no-editing scope choice, and the Phase 0 model-selection deferral. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- .../specs/2026-05-20-ai-restyle-design.md | 471 ++++++++++++++++++ 1 file changed, 471 insertions(+) create mode 100644 docs/superpowers/specs/2026-05-20-ai-restyle-design.md diff --git a/docs/superpowers/specs/2026-05-20-ai-restyle-design.md b/docs/superpowers/specs/2026-05-20-ai-restyle-design.md new file mode 100644 index 00000000..4714feb4 --- /dev/null +++ b/docs/superpowers/specs/2026-05-20-ai-restyle-design.md @@ -0,0 +1,471 @@ +# Design — AI Restyle (new sidebar product) + +**Status:** Approved by user 2026-05-20. Implementation plan pending. +**Working name:** AI Restyle (sidebar label "AI Restyle"). Internal product ID: `ai-restyle`. + +--- + +## 1. Goal & non-goals + +### Goal + +Let users upload a video they've already produced and get back the **same video with a transformed background and lighting**. Same person, same words, same motion, same cuts, same duration — restyled to look like it was shot in the user-chosen location with the user-chosen lighting setup. + +Concretely: a hand-held phone clip filmed in a dim living room becomes a clip that looks like it was filmed in a Bahamas beach at golden hour, or in a clean white studio with softbox lighting. + +### Non-goals (v1) + +This product **does not**: +- Run viral extraction or pick "best moments" from a long source (that's Short-form's job). +- Apply subtitles, AI edits (zoompan/cuts), color grade, or silence removal. These are deliberately out of scope. Users export the restyled output and feed it into Short-form if they want polish. +- Generate net-new video content (no Kling/Sora image-to-video). The output content is the input content; only style changes. +- Handle videos >30s. v1 caps at 30 seconds (single video-to-video call). Longer durations come in a separate milestone (see §10 Future Work). +- Replace the user's audio (original audio is preserved bit-for-bit). +- Generate net-new audio (TTS, music, SFX). + +--- + +## 2. User flow + +``` +Sidebar: AI Restyle (between Long-form and Short-form) + │ + ▼ +┌─────────────┐ ┌─────────────┐ ┌──────────────┐ +│ 1. Upload │ → │ 2. Configure│ → │ 3. Review │ +│ (≤30s MP4) │ │ (presets + │ │ (before/after│ +│ │ │ prompt) │ │ + Send to │ +│ │ │ │ │ Short-form) │ +└─────────────┘ └─────────────┘ └──────────────┘ + │ + ▼ + Optional: Send-to-Short-form + hands the file to /short-form's + Upload step, pre-loaded. +``` + +### Step 1 — Upload + +- Single drop zone. MP4/MOV. ≤2GB. ≤30s **(client-side `<video>.duration` probe BEFORE upload; server re-verifies via ffprobe and rejects with HTTP 413)**. +- Rejection message for too-long videos: *"AI Restyle v1 caps at 30s. Trim your video first or use Short-form."* +- Reuses `Upload.jsx`'s MIME + ftyp validation logic. +- CTA: `Continue →` disabled until file passes validation. + +### Step 2 — Configure + +- Two dropdowns side by side: **Background** + **Lighting**. Each lists user's presets (read from `localStorage`). +- Selected preset's prompt text is rendered in an **editable textarea** beneath the dropdowns. Editing this box overrides the preset prompt *for this job only* — the saved preset is unchanged. +- The "effective prompt" sent to Nano Banana = `background_text + " • " + lighting_text + " • " + safety_constraints` (safety constraints are hard-coded server-side). +- CTA: `Start restyle →`. + +### Step 3 — Review + +- Phone-frame preview of the restyled clip. +- **Before/After** toggle (Original vs Restyled) — same UX as Short-form `Review.jsx`. +- **Download** link to the restyled MP4. +- **Send to Short-form** primary CTA. Stashes the restyled file into a routing payload that `/short-form` reads on mount and pre-loads into its Upload step. Closes the loop with the existing pipeline. +- No editing controls (no stage selector, no LUT picker — those belong to Short-form). + +--- + +## 3. Backend architecture + +### Route module + +**New file:** `backend/app/routes/ai_restyle.py` — first populated file in `backend/app/routes/`. Establishes the pattern future router-split work will follow. + +```python +POST /api/restyle + Form fields: + file: UploadFile # MP4/MOV, ≤30s, ≤2GB + background_prompt: str # Effective background text (≤500 chars) + lighting_prompt: str # Effective lighting text (≤500 chars) + Headers: + X-Gemini-Key: str # required (Nano Banana) + X-Fal-Key: str # required (video-to-video) + Returns: + { "job_id": "<uuid>" } + +GET /api/restyle/{job_id} + Returns: + { "status": "processing" | "completed" | "failed", + "logs": [str, ...], + "progress_pct": 0..100, # coarse: 0/10/40/85/100 + "result": { "video_url": "/videos/{job_id}/restyled_{filename}.mp4", + "original_url": "/videos/{job_id}/{filename}", + "duration_sec": float } | null } +``` + +`POST /api/restyle` and `GET /api/restyle/{job_id}` are wired into `backend/app/main.py`'s router list in the same place the existing routes are registered. + +Job state lives in the same `jobs` dict that `main.py` already manages, with `status` semantics identical to Short-form (`processing → completed | failed`). The frontend reuses `useJobPolling.js` unchanged. + +### Pipeline orchestrator + +**New file:** `backend/app/saas/restyle_pipeline.py`. (Lives under `saas/` because it's the same architectural neighbor as the existing SaaSShorts pipeline — both are multi-stage ML orchestrators that compose Gemini + fal.ai + FFmpeg. Long-term it may move to its own package; out of scope for v1.) + +Single public function: + +```python +async def run_restyle_job( + job_id: str, + input_path: str, + background_prompt: str, + lighting_prompt: str, + gemini_key: str, + fal_key: str, +) -> None: + """Orchestrate the 7-step restyle pipeline. Updates jobs[job_id] in place.""" +``` + +### Pipeline steps + +| # | Step | Module | Failure mode | +|---|---|---|---| +| 1 | Validate upload (MIME + ftyp + duration ≤30s) | `routes/ai_restyle.py` | HTTP 415 / 413 | +| 2 | Probe duration via ffprobe | `video/ffmpeg.py:probe_duration` (exists) | 500 | +| 3 | Extract first frame to PNG | `ml/frame_extract.py` (NEW) | 500, log + fail job | +| 4 | Nano Banana relight | `ml/frame_relight.py` (NEW) | 500, log + fail job | +| 5 | Video-to-video restyle | `ml/video_restyle.py` (NEW) | 500, log + fail job | +| 6 | Mux original audio | `video/ffmpeg.py:mux_video_audio` (exists) | 500, log + fail job | +| 7 | Write metadata.json + serve | `routes/ai_restyle.py` | 500 | + +All FFmpeg operations route through `backend/app/video/ffmpeg.py` (Convention #1). + +### New ML modules + +#### `backend/app/ml/frame_extract.py` + +```python +def extract_first_frame(video_path: str, out_path: str) -> str: + """Extract frame at t=0 to PNG. Returns out_path on success.""" +``` + +One-line FFmpeg call (`-ss 0 -frames:v 1 -y`). ~30 lines including docstring + error handling. + +#### `backend/app/ml/frame_relight.py` + +```python +def relight_frame( + api_key: str, + frame_path: str, + background_prompt: str, + lighting_prompt: str, + out_path: str, +) -> str: + """Call gemini-2.5-flash-image-preview with the frame + relight prompts. + Returns out_path on success.""" +``` + +Mirrors the existing pattern in `backend/app/thumbnails/images.py:generate_thumbnail` (already calls the same model). The effective prompt template: + +``` +Relight this image with the following style. Keep the person, pose, +clothing, and composition EXACTLY as in the source. Only change the +background and lighting. + +Background: {background_prompt} +Lighting: {lighting_prompt} + +Do not add or remove any people or objects. Do not change facial features +or body proportions. Preserve the framing and camera angle. +``` + +Safety constraints (the "keep person/pose/clothing" and "no add/remove people" clauses) are hard-coded in this module, not user-controllable. + +#### `backend/app/ml/video_restyle.py` + +```python +def restyle_video( + api_key: str, + video_path: str, + reference_frame_path: str, + out_path: str, +) -> str: + """Call fal.ai video-to-video with the source video + reference frame. + Returns out_path on success.""" +``` + +**Model selection deferred to implementation Phase 0** — needs a quick research spike against fal.ai's current catalog (as of 2026-05) to pick between Wan v2.5 video-to-video, Luma Ray2 reference-conditioned, Runway Gen-3 Alpha Turbo v2v (if available on fal.ai), or an alternative. Acceptance criteria for the model: +- Accepts a source video (≤30s) AND a reference image. +- Restyles video to match reference's lighting + background while preserving motion + content. +- Cost: ≤$2 per 30s gen. +- Latency: ≤5min per 30s gen. + +If no fal.ai-hosted model meets the bar, fallback is direct Runway API integration (Runway has a v2v product). This is the highest-risk decision in the entire plan and the implementation plan's Phase 0 is dedicated to validating it. + +--- + +## 4. Frontend architecture + +### Pages directory + +**New folder:** `frontend/src/pages/AIRestyle/` (sibling to `LongForm/` and `ShortForm/`). Per Convention #7, this folder does **not** cross-import from `LongForm/` or `ShortForm/`. Shared code goes through `hooks/`, `components/ui/`, `state/`, or `lib/`. + +Files: + +``` +frontend/src/pages/AIRestyle/ +├── index.jsx # Routes + Wizard mount + History tab +├── Wizard.jsx # 3-step wizard wrapper (mirrors ShortForm/Wizard.jsx) +├── History.jsx # Past restyle jobs (read from localStorage, last 20) +└── steps/ + ├── Upload.jsx + ├── Configure.jsx + └── Review.jsx +``` + +### State + +Wizard state shape (persisted to `localStorage` via `useWizard`): + +```js +{ + step: 0, + data: { + file: { id, name, size, durationSec, file: File } | null, + selection: { + backgroundPresetId: string, // FK into presetsStore + lightingPresetId: string, + backgroundPromptOverride: string | null, // if user edited the textarea + lightingPromptOverride: string | null, + }, + job: { + jobId: string, + status: 'idle' | 'processing' | 'completed' | 'failed', + result: { video_url, original_url, duration_sec } | null, + progressPct: number, + logs: string[], + } | null, + }, +} +``` + +`File` handle is lost on reload — same constraint as Short-form. The `resetOnRehydrate` guard mirrors `ShortForm/Wizard.jsx`'s shortFormNeedsFreshUpload pattern. + +### Sidebar entry + +**Modified file:** `frontend/src/layouts/Sidebar.jsx`. Add one entry to `NAV` between Long-form and Short-form: + +```js +{ to: '/ai-restyle', label: 'AI Restyle', icon: Wand2 }, +``` + +Icon: `Wand2` from `lucide-react`. + +### Settings tab + +**Modified file:** `frontend/src/pages/Settings/` — adds a new tab "AI Restyle" alongside existing tabs. + +Tab layout: two sections (Backgrounds, Lightings), each showing the user's preset list with row-level actions: + +``` +┌──────────────────────────────────────────────────────────────┐ +│ ★ Studio white [Edit] [Delete*] │ +│ clean white seamless backdrop, minimalist photo studio │ +├──────────────────────────────────────────────────────────────┤ +│ Sunlit office [Edit] [Delete] [★] │ +│ sunlit modern office with floor-to-ceiling windows… │ +├──────────────────────────────────────────────────────────────┤ +│ + Add background preset │ +└──────────────────────────────────────────────────────────────┘ + *Disabled for starred default +``` + +- **★** marks one preset per dimension as "Recommended/Default" (pre-selected in the wizard's Configure step). Clicking [★] on another preset moves the star atomically. +- **[Edit]** opens a modal with two fields: `name` (≤40 chars) + `prompt` (≤500 chars). Cancel discards. +- **[Delete]** is disabled for the starred default (prevents the dropdown from going empty). +- **+ Add preset** opens the same modal as Edit, with empty fields. + +### Preset storage + +**New file:** `frontend/src/state/aiRestylePresets.js`. Mirrors the pattern in `keysStore.js` and `lib/brandKit.js`: + +- LocalStorage key: `openshorts.aiRestyle.presets` +- Custom event for cross-component reactivity: `openshorts:ai-restyle-presets-changed` +- Hook: `useAIRestylePresets()` returns `{ backgrounds: [], lightings: [], setBackgrounds, setLightings, setDefault }`. + +### Seed presets (first-load defaults) + +| Backgrounds | Prompt fragment | +|---|---| +| ★ Studio white | clean white seamless backdrop, minimalist photo studio, no clutter, perfect color separation | +| Sunlit office | bright modern office interior with floor-to-ceiling windows, soft natural light, plants, wooden desk | +| Bahamas beach | tropical beach with palm trees, turquoise ocean water in the distance, soft white sand | +| Cyberpunk neon | nighttime city street with vivid neon signs, pink-and-cyan color palette, light fog | +| Cinematic forest | deep forest with dappled sunlight through tall pine trees, mossy ground, atmospheric haze | + +| Lightings | Prompt fragment | +|---|---| +| ★ Studio softbox | soft diffused studio softbox lighting from camera-left, gentle fill on the right, no harsh shadows | +| Sunlit office | bright daylight pouring through large windows, soft fill on subject's face | +| Golden hour | warm golden-hour sun low and to the side, long shadows, amber and rose tones | +| Cinematic moody | low-key cinematic lighting with strong directional key, deep shadows, single soft fill | +| Neon nighttime | colored neon spill lighting (pink and cyan accents), low ambient, subject lit from multiple sides | + +These 10 presets ship with the build; first-load seeding writes them to `localStorage` only if no preset list exists yet. Existing users with custom presets are not overwritten. + +--- + +## 5. Cross-cutting concerns + +### Security baseline (per global CLAUDE.md `securing-http-and-llm-endpoints` skill) + +`POST /api/restyle` classifies as **STATE-MUTATING + LLM-CALL** (calls Gemini for Nano Banana + fal.ai for v2v). Required controls per the skill's tier matrix, with status: + +| Control | Status | Notes | +|---|---|---| +| C1 Auth (BYO API key via header) | ✓ Inherited | `X-Gemini-Key`, `X-Fal-Key` headers | +| C2 Rate limit | DEFER | Same opt-out as HANDOFF.md §5; lands in `/gsd-secure-phase` sweep | +| C3 Input validation | ✓ Required at impl time | Pydantic + duration cap + prompt length cap | +| C4 Timeout/retry/breaker | DEFER | Same opt-out; fal.ai client already has internal retry | +| C5 Output rate limit | N/A | Single-file response | +| C6 PII redaction | N/A | No structured PII in logs | +| C7 Idempotency | DEFER | Same opt-out | +| C8 Concurrency lock | N/A | No shared mutable resource | +| C9 Audit logging | ✓ Required at impl time | Job logs already capture; add cost line per Gemini/fal call | +| C10 Cost / abuse cap | DEFER | Same opt-out | + +Implementation plan must add **C3 (Pydantic + duration cap + prompt length cap)** and **C9 (per-call cost logging)** as in-scope tasks. C2/C4/C7/C10 wait for the cross-router sweep. + +### Cost telemetry + +Each Nano Banana + fal.ai call appends a line to the job logs: + +``` +💰 Nano Banana relight: $0.039 (1 image) +💰 fal.ai v2v: $1.20 (30s × $0.04/s — model: <name>) +💰 Total: $1.24 +``` + +Total cost surfaces in the Review step as a small footer beneath the Download button. + +### Failure handling + +- **Nano Banana fails** (content policy / network): job fails with `status='failed'`, logs include the API error message. No partial output. +- **Video-to-video fails** (timeout, content policy, model overload): same. +- **Audio mux fails** (rare): retry once with `-c:a aac` fallback before failing the job. +- **Job times out** (>15min): wizard surfaces a "Job stuck — refresh to retry" message. Backend marks `status='failed'` after 15min. + +The frontend never crashes on a failed job — Review step renders the failure logs and offers `Try again` (returns to Configure with the same selection) or `Start over` (back to Upload). + +### Tests + +Backend (added to `backend/tests/`): +- `unit/test_frame_extract.py` — fixture MP4 → first frame → asserts PNG file exists + dimensions ≥320x320. +- `unit/test_frame_relight.py` — mocks Gemini client, asserts prompt-template formatting + retry-on-content-policy fallback. +- `unit/test_video_restyle.py` — mocks fal.ai client, asserts payload shape + cost telemetry log line. +- `unit/test_restyle_pipeline.py` — runs the orchestrator end-to-end with all three ML modules mocked; asserts job logs cycle through all 7 steps + status flips to `completed`. +- `api/test_openapi_contract.py` — snapshot picks up `/api/restyle` + `/api/restyle/{job_id}`. + +Frontend: `npm run build` 0 warnings + browser smoke test per HANDOFF.md §6 rule 6. + +--- + +## 6. Files added / modified + +### Added (backend) + +- `backend/app/routes/__init__.py` — empty (creates the package) +- `backend/app/routes/ai_restyle.py` — FastAPI router (≈150 lines) +- `backend/app/saas/restyle_pipeline.py` — orchestrator (≈120 lines) +- `backend/app/ml/frame_extract.py` — ≈30 lines +- `backend/app/ml/frame_relight.py` — ≈80 lines +- `backend/app/ml/video_restyle.py` — ≈100 lines (model TBD) + +### Added (frontend) + +- `frontend/src/pages/AIRestyle/index.jsx` +- `frontend/src/pages/AIRestyle/Wizard.jsx` +- `frontend/src/pages/AIRestyle/History.jsx` +- `frontend/src/pages/AIRestyle/steps/Upload.jsx` +- `frontend/src/pages/AIRestyle/steps/Configure.jsx` +- `frontend/src/pages/AIRestyle/steps/Review.jsx` +- `frontend/src/state/aiRestylePresets.js` + +### Added (tests) + +- `backend/tests/unit/test_frame_extract.py` +- `backend/tests/unit/test_frame_relight.py` +- `backend/tests/unit/test_video_restyle.py` +- `backend/tests/unit/test_restyle_pipeline.py` + +### Modified + +- `backend/app/main.py` — register the new router (one-line `app.include_router(ai_restyle.router)`) +- `backend/tests/snapshots/baseline.openapi.json` — regenerate after route is wired +- `frontend/src/App.jsx` — add `<Route path="ai-restyle/*" element={<AIRestyle />} />` +- `frontend/src/layouts/Sidebar.jsx` — add the NAV entry +- `frontend/src/pages/Settings/sections/AIRestylePresetsSection.jsx` (new) — adds the "AI Restyle" tab to the existing sections pattern (`ApiKeysSection`, `BrandKitSection`, etc.) +- `frontend/src/pages/Settings/index.jsx` — register the new section in the tab list +- `ROADMAP.md` — promote "Planned product: AI Restyle" from later to shipped/in-progress +- `~/.claude/CLAUDE.md` — `## OpenShorts (project-specific)` repo-map / module-map auto-managed sections regenerate after backend modules land + +--- + +## 7. Implementation milestones (preview — full plan via `writing-plans`) + +| Phase | Scope | Estimate | +|---|---|---| +| **0** | Research spike: validate the video-to-video model on fal.ai. Run 3-5 test gens; verify quality + cost + latency hit the bar. Pick the model. | 0.5–1 day | +| **1** | Backend `ml/frame_extract.py` + `ml/frame_relight.py` + 2 unit tests. Verify Nano Banana relight quality against 3 hand-picked source frames. | 1 day | +| **2** | Backend `ml/video_restyle.py` + `saas/restyle_pipeline.py` + 2 unit tests + manual end-to-end with a single fixture clip. | 1.5 days | +| **3** | Backend `routes/ai_restyle.py` + OpenAPI snapshot regen + pytest gate green. | 0.5 day | +| **4** | Frontend `pages/AIRestyle/` (3-step wizard) + sidebar entry + routing. | 1.5 days | +| **5** | Frontend Settings tab + `aiRestylePresets.js` + seed defaults. | 1 day | +| **6** | Browser smoke test (per HANDOFF.md §6 rule 6) + Codex adversarial review per global CLAUDE.md + commit + ship. | 0.5 day | + +**Total estimate:** 6–7 working days, gated on Phase 0 validating the model. If Phase 0 fails (no fal.ai v2v model meets the bar), pivot to Runway direct integration adds ~1 day. + +--- + +## 8. Roadmap entry + +Promotes the entry already saved as project memory (`project_ai_short_form.md`) into `ROADMAP.md` under a new section: + +``` +### AI Restyle (new product, v1 in progress) + +**Stubbed in v1** +- Restyle a video's lighting + background while preserving content, motion, and audio. +- Sidebar entry between Long-form and Short-form. +- 3-step wizard: Upload → Configure → Review. +- Settings tab "AI Restyle" with CRUD for Background + Lighting preset prompts. +- Cap: 30s per video. + +**Later** +- Lift the 30s cap via chunked v2v with shared reference frame (Approach B from design). +- Bridge from Short-form Review's stage selector ("+ AI Restyle" stage). +- Auto-suggest preset based on the source frame (Gemini-driven). +``` + +--- + +## 9. Decisions + +| ID | Decision | Rationale | +|---|---|---| +| **D1** | v1 caps at 30s. | Single video-to-video call. Predictable cost (~$1-2). No stitch artifacts. Validates the model choice cheaply before committing to chunking. | +| **D2** | No editing chain in this product. | User's explicit ask. Restyled output can be fed into Short-form for polish. Keeps this product's value proposition crisp. | +| **D3** | Presets in `localStorage`, not backend. | Same pattern as `keysStore` + `brandKit`. No backend persistence needed. User-editable in Settings without an API round-trip. | +| **D4** | Two preset dimensions (background + lighting), not one combined preset. | Users want to mix-and-match: "Bahamas beach" × "Cinematic moody" is a different mood than "Bahamas beach" × "Golden hour". Two dropdowns is 5×5=25 combos with 10 stored fragments — much higher leverage than 25 stored combos. | +| **D5** | Per-job prompt override via textarea. | Lets power users iterate without polluting their saved presets. Editing the textarea overrides for THIS job only. | +| **D6** | Model choice deferred to Phase 0. | High-uncertainty — fal.ai's v2v catalog evolves quickly. A 0.5-day spike early de-risks the whole plan. If no fal.ai option meets the bar, fallback is Runway direct API (~1 extra day). | +| **D7** | Reuse `backend/app/saas/` neighbor for pipeline orchestration. | Architectural fit: SaaSShorts is the existing multi-stage Gemini+fal.ai+FFmpeg orchestrator. Restyle is structurally identical. Moving to its own package can come later. | +| **D8** | Audio is original audio, untouched. | Subtitles in Short-form rely on audio-timestamp alignment with the transcript. Re-encoding or replacing audio would break that downstream contract. | +| **D9** | First file populated under `backend/app/routes/`. | The router-split refactor (HANDOFF.md §5) has been deferred indefinitely. AI Restyle landing here as the first inhabitant sets the precedent. Future router-split work moves existing routes to neighbors. | +| **D10** | Defer C2 / C4 / C7 / C10 security controls. | Inherited from existing per-route opt-outs (Phase 1 D3, Phase 2 D4 in the polish plan). Cross-router sweep lands in `/gsd-secure-phase`. | + +--- + +## 10. Future work (explicitly out of scope for v1) + +- **Long-form support (>30s up to 3min)** — requires chunk-and-stitch logic with shared reference frame. Plan separately as "AI Restyle: long-form". +- **Voiceover / TTS** — for AI ads use case. Probably belongs in a different product (e.g. SaaSShorts) than AI Restyle. +- **Multi-reference frames** — pick a reference frame per scene boundary instead of just the first frame. Improves quality for source videos with hard cuts. +- **Preset marketplace / sharing** — currently presets are per-browser localStorage. Backend-stored presets with team sharing is a v3 feature. +- **In-line preset preview** — show a Nano-Banana-relit thumbnail of the source's first frame in the Configure step before committing to the full v2v call. Saves cost when users iterate. + +--- + +*End of design.* From 917ba9987ce6cde0fc8b0220df02fe35fcf1b4a5 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 14:47:09 -0400 Subject: [PATCH 36/43] docs(ai-restyle): implementation plan + roadmap entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plan breaks the 7-phase milestone into bite-sized TDD tasks: Phase 0 model spike, Phase 1 frame_extract + frame_relight, Phase 2 video_restyle + restyle_pipeline orchestrator, Phase 3 routes + OpenAPI snapshot, Phase 4 preset store + wizard pages, Phase 5 Settings tab CRUD, Phase 6 smoke test + Codex review + ROADMAP/CLAUDE.md refresh. Each task is 2-5 minutes (test → run-fail → impl → run-pass → commit) per the writing-plans skill conventions. ROADMAP.md gets a new top-of-product-roadmap section pointing at both the spec and the plan, with the v1 scope (30s cap, 3-step wizard, preset CRUD) and the explicit out-of-scope items (>30s chunking, ShortForm bridge, AI preset suggestion). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- ROADMAP.md | 27 + .../plans/2026-05-20-ai-restyle.md | 2282 +++++++++++++++++ 2 files changed, 2309 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-20-ai-restyle.md diff --git a/ROADMAP.md b/ROADMAP.md index 63e10493..02fa5147 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -15,6 +15,33 @@ Tiering: - **Stubbed in v1** — UI is in place; the backend feature is a no-op, placeholder, or partial loop. Each item lists the backend TODO that unblocks it. - **Later** — not started. +### AI Restyle (planned — new sidebar product) + +A new sibling to Short-form / Long-form. Upload a video you've already produced; AI Restyle relights the first frame via Nano Banana (Gemini 2.5 Flash image preview) and uses that frame as the style reference for a video-to-video model — preserving the original motion, content, and audio while restyling lighting + background. + +**Spec:** [`docs/superpowers/specs/2026-05-20-ai-restyle-design.md`](docs/superpowers/specs/2026-05-20-ai-restyle-design.md) +**Plan:** [`docs/superpowers/plans/2026-05-20-ai-restyle.md`](docs/superpowers/plans/2026-05-20-ai-restyle.md) + +**Planned (v1 scope, ~6-7 days):** +- Sidebar entry between Long-form and Short-form (icon: `Wand2`) +- 3-step wizard (Upload → Configure → Review) +- Two preset dimensions (Background + Lighting) with 5 hand-tuned seed presets each +- Per-job prompt override via editable textarea +- Settings tab "AI Restyle" with full preset CRUD (star/edit/delete) +- 30s duration cap (single video-to-video call; predictable ~$1-2 cost per clip) +- Original audio preserved bit-for-bit +- "Send to Short-form" CTA closes the loop with the editing pipeline + +**Out of scope for v1 (Later):** +- >30s clips via chunked v2v (3-minute "AI ads" use case) +- Bridge from Short-form Review's stage selector ("+ AI Restyle" stage) +- Auto-suggest preset based on the source frame +- Backend-stored preset sharing / team marketplace + +**Risks:** Video-to-video model choice deferred to Phase 0 spike (Wan v2.5 / Luma Ray2 / Runway Gen-3 Alpha candidates). If no fal.ai model meets the acceptance bar, fallback is direct Runway API integration (~1 extra day). + +--- + ### Short-form wizard **Shipped** diff --git a/docs/superpowers/plans/2026-05-20-ai-restyle.md b/docs/superpowers/plans/2026-05-20-ai-restyle.md new file mode 100644 index 00000000..15bb0b14 --- /dev/null +++ b/docs/superpowers/plans/2026-05-20-ai-restyle.md @@ -0,0 +1,2282 @@ +# AI Restyle Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Ship a new sidebar product "AI Restyle" that uploads a ≤30s video, relights its first frame via Nano Banana (Gemini 2.5 Flash image preview), and uses that frame as a style reference for video-to-video restyling that preserves the original motion + audio. + +**Architecture:** New backend route `/api/restyle` orchestrating a 7-step pipeline (validate → probe → extract first frame → relight → v2v → mux audio → persist). Frontend 3-step wizard (Upload → Configure → Review) plus a Settings tab for preset CRUD. Presets are 2-dimensional (background + lighting) and live in browser localStorage. Full design at `docs/superpowers/specs/2026-05-20-ai-restyle-design.md`. + +**Tech Stack:** Python 3.11 + FastAPI + google-genai + fal_client + FFmpeg (backend); React 18 + Vite + Tailwind + lucide-react (frontend); pytest (backend tests); browser smoke test via chrome-devtools MCP (frontend gate). + +--- + +## Phase 0 — Model selection spike (research, ~0.5 day) + +### Task 0.1: Validate the video-to-video model + +**Files:** +- Create: `docs/superpowers/specs/2026-05-20-ai-restyle-phase0-spike.md` + +This phase produces a research artifact, not code. The spec defers model choice to this spike. + +- [ ] **Step 1: Survey fal.ai catalog** + +Open https://fal.ai/models and filter by "video-to-video". Document candidates that accept a source video AND a reference image (or that can be combined with a separate img2img reference). As of 2026-05 candidates to evaluate (verify each still exists): +- `fal-ai/wan/v2.5/turbo/video-to-video` +- `fal-ai/luma-photon` (Luma's photon family) +- `fal-ai/runway-gen3-alpha-turbo/video-to-video` (if hosted on fal.ai) +- `fal-ai/pixverse/v4/restyle` (if exists) + +- [ ] **Step 2: Pick one candidate; run a 5s test gen** + +Use `demo-openshorts.mp4` (already in repo root) — trim to 5s with `ffmpeg -i demo-openshorts.mp4 -t 5 -c copy /tmp/spike-5s.mp4`. Generate a reference frame by hand: run the first frame through https://aistudio.google.com/ with prompt "relight this image with Bahamas beach background and golden hour lighting; keep subject and pose unchanged". Save reference frame to `/tmp/spike-ref.png`. + +Call the candidate model via `fal_client` (requires `FAL_KEY` env var): + +```python +import fal_client +res = fal_client.run( + "fal-ai/<chosen-model>", + arguments={ + "video_url": <upload /tmp/spike-5s.mp4>, + "image_url": <upload /tmp/spike-ref.png>, + "prompt": "match the lighting and background of the reference image", + }, +) +print(res) +``` + +Time the call. Inspect the output video. + +- [ ] **Step 3: Score against acceptance criteria** + +Acceptance bar (from spec §3 `video_restyle.py`): +- Accepts source video AND reference image +- Restyles to match reference's lighting + background +- Preserves motion + content (subject still talks/moves the same) +- Cost: ≤$2 per 30s gen (linear-extrapolate from the 5s test) +- Latency: ≤5min per 30s gen + +If the chosen candidate fails on any criterion, repeat Step 2 with the next candidate. If all fail, escalate to direct Runway API integration (write a new spike doc; pivot the plan). + +- [ ] **Step 4: Write the spike doc** + +Create `docs/superpowers/specs/2026-05-20-ai-restyle-phase0-spike.md` with: +- Models surveyed (table: name + URL + supports v2v + supports ref image) +- Test methodology (which clip, which reference, the prompt) +- Per-model results (quality screenshot, cost, latency) +- **Decision:** chosen model ID, justification +- Output payload shape (the exact response JSON from the chosen model) — required for Phase 2 Task 2.1. + +- [ ] **Step 5: Commit** + +```bash +git add docs/superpowers/specs/2026-05-20-ai-restyle-phase0-spike.md +git commit -m "docs(ai-restyle): Phase 0 spike — picked <model> for video-to-video" +``` + +--- + +## Phase 1 — Backend ML: frame extract + relight (~1 day) + +### Task 1.1: First-frame extractor + +**Files:** +- Create: `backend/app/ml/frame_extract.py` +- Test: `backend/tests/unit/test_frame_extract.py` +- Existing fixture: `demo-openshorts.mp4` (repo root, 5.3 MB) — copy a trimmed version into `backend/tests/fixtures/` (create the dir). + +- [ ] **Step 1: Write the failing test** + +```python +# backend/tests/unit/test_frame_extract.py +"""Tests for ml/frame_extract: extract first frame of a video to PNG.""" +from __future__ import annotations + +import os +from pathlib import Path + +import pytest + +from app.ml.frame_extract import extract_first_frame + + +FIXTURE = Path(__file__).resolve().parent.parent / "fixtures" / "short-clip.mp4" + + +@pytest.fixture(scope="module", autouse=True) +def _ensure_fixture(): + """Trim demo-openshorts.mp4 to 5s on first run so the test is fast.""" + if FIXTURE.exists(): + return + FIXTURE.parent.mkdir(parents=True, exist_ok=True) + repo_root = Path(__file__).resolve().parents[3] + src = repo_root / "demo-openshorts.mp4" + if not src.exists(): + pytest.skip("demo-openshorts.mp4 fixture missing") + import subprocess + subprocess.run( + ["ffmpeg", "-y", "-i", str(src), "-t", "5", "-c", "copy", str(FIXTURE)], + check=True, capture_output=True, + ) + + +def test_extract_first_frame_writes_png(tmp_path): + out = tmp_path / "frame.png" + result = extract_first_frame(str(FIXTURE), str(out)) + assert result == str(out) + assert out.exists() + assert out.stat().st_size > 1000 # not empty + + +def test_extract_first_frame_missing_input(tmp_path): + out = tmp_path / "frame.png" + with pytest.raises(FileNotFoundError): + extract_first_frame(str(tmp_path / "does-not-exist.mp4"), str(out)) +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd backend && pytest tests/unit/test_frame_extract.py -v +``` + +Expected: `ImportError: cannot import name 'extract_first_frame' from 'app.ml.frame_extract'` (module doesn't exist yet) + +- [ ] **Step 3: Implement extract_first_frame** + +```python +# backend/app/ml/frame_extract.py +"""Extract the first video frame to a PNG file via FFmpeg.""" +from __future__ import annotations + +import os + +from app.video.ffmpeg import run as ffmpeg_run, FFmpegError + + +def extract_first_frame(video_path: str, out_path: str) -> str: + """Write the frame at t=0 of ``video_path`` to ``out_path`` as PNG. + + Returns ``out_path`` on success. Raises ``FileNotFoundError`` if the + source is missing; ``FFmpegError`` if encoding fails. + """ + if not os.path.exists(video_path): + raise FileNotFoundError(f"Input video not found: {video_path}") + os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True) + ffmpeg_run( + ["-y", "-ss", "0", "-i", video_path, "-frames:v", "1", "-update", "1", out_path], + ) + return out_path +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +cd backend && pytest tests/unit/test_frame_extract.py -v +``` + +Expected: 2 passed. + +- [ ] **Step 5: Commit** + +```bash +cd "/Users/matissevansteenbergen/Downloads/AGENTIC WORKLFOWS/PERSONAL/Auto-shorts (TODO)/openshorts" +git add backend/app/ml/frame_extract.py backend/tests/unit/test_frame_extract.py +git commit -m "feat(ai-restyle): ml/frame_extract first-frame extractor" +``` + +--- + +### Task 1.2: Nano Banana relight + +**Files:** +- Create: `backend/app/ml/frame_relight.py` +- Test: `backend/tests/unit/test_frame_relight.py` +- Reference pattern: `backend/app/thumbnails/images.py:generate_thumbnail` (same model, similar shape) + +- [ ] **Step 1: Write the failing test** + +```python +# backend/tests/unit/test_frame_relight.py +"""Tests for ml/frame_relight: Nano Banana relight call. + +Gemini client is mocked — we don't want network calls or API costs in unit +tests. We do verify the prompt template structure, the model name, and +the file write contract. +""" +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from app.ml.frame_relight import ( + SAFETY_CONSTRAINTS, + build_relight_prompt, + relight_frame, +) + + +def test_build_relight_prompt_contains_inputs(): + p = build_relight_prompt("bahamas beach", "golden hour") + assert "bahamas beach" in p.lower() + assert "golden hour" in p.lower() + + +def test_build_relight_prompt_contains_safety_constraints(): + p = build_relight_prompt("x", "y") + for clause in SAFETY_CONSTRAINTS: + assert clause in p + + +def test_relight_frame_calls_gemini_image_preview_model(tmp_path): + src = tmp_path / "src.png" + src.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 100) + out = tmp_path / "out.png" + + fake_client = MagicMock() + fake_resp = MagicMock() + fake_part = MagicMock() + fake_part.inline_data = MagicMock(data=b"\x89PNG\r\n\x1a\n" + b"\x00" * 200) + fake_resp.candidates = [MagicMock(content=MagicMock(parts=[fake_part]))] + fake_client.models.generate_content.return_value = fake_resp + + with patch("app.ml.frame_relight.genai.Client", return_value=fake_client): + result = relight_frame( + api_key="fake-key", + frame_path=str(src), + background_prompt="bahamas beach", + lighting_prompt="golden hour", + out_path=str(out), + ) + + assert result == str(out) + assert out.exists() + call = fake_client.models.generate_content.call_args + assert call.kwargs["model"] == "gemini-2.5-flash-image-preview" + + +def test_relight_frame_missing_input(tmp_path): + out = tmp_path / "out.png" + with pytest.raises(FileNotFoundError): + relight_frame( + api_key="x", + frame_path=str(tmp_path / "missing.png"), + background_prompt="x", + lighting_prompt="y", + out_path=str(out), + ) + + +def test_relight_frame_handles_no_inline_data(tmp_path): + """Model sometimes returns text instead of image — must raise, not silently write nothing.""" + src = tmp_path / "src.png" + src.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 100) + out = tmp_path / "out.png" + + fake_client = MagicMock() + fake_resp = MagicMock() + fake_part = MagicMock(inline_data=None, text="sorry, can't comply") + fake_resp.candidates = [MagicMock(content=MagicMock(parts=[fake_part]))] + fake_client.models.generate_content.return_value = fake_resp + + with patch("app.ml.frame_relight.genai.Client", return_value=fake_client): + with pytest.raises(RuntimeError, match="no image"): + relight_frame( + api_key="x", + frame_path=str(src), + background_prompt="x", + lighting_prompt="y", + out_path=str(out), + ) +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd backend && pytest tests/unit/test_frame_relight.py -v +``` + +Expected: `ImportError: cannot import name 'relight_frame'` (module doesn't exist). + +- [ ] **Step 3: Implement frame_relight** + +```python +# backend/app/ml/frame_relight.py +"""Nano Banana relight: send a frame + relight prompts to Gemini's image +preview model and write the relit frame to disk. + +Mirrors the call pattern in ``backend/app/thumbnails/images.py:generate_thumbnail``. +""" +from __future__ import annotations + +import os +from typing import List + +from google import genai +from google.genai import types + +MODEL_NAME = "gemini-2.5-flash-image-preview" + +SAFETY_CONSTRAINTS: List[str] = [ + "Keep the person, pose, clothing, and composition EXACTLY as in the source.", + "Do not add or remove any people or objects.", + "Do not change facial features or body proportions.", + "Preserve the framing and camera angle.", +] + + +def build_relight_prompt(background_prompt: str, lighting_prompt: str) -> str: + """Compose the Nano Banana prompt from user-controlled fragments + safety.""" + safety_block = "\n".join(f"- {c}" for c in SAFETY_CONSTRAINTS) + return ( + "Relight this image with the following style. Only change the " + "background and lighting.\n\n" + f"Background: {background_prompt}\n" + f"Lighting: {lighting_prompt}\n\n" + "Constraints:\n" + f"{safety_block}" + ) + + +def relight_frame( + api_key: str, + frame_path: str, + background_prompt: str, + lighting_prompt: str, + out_path: str, +) -> str: + """Call Nano Banana with the input frame + prompts. Writes relit PNG to ``out_path``. + + Returns ``out_path``. Raises ``FileNotFoundError`` if input missing, + ``RuntimeError`` if the response carries no image data. + """ + if not os.path.exists(frame_path): + raise FileNotFoundError(f"Input frame not found: {frame_path}") + os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True) + + with open(frame_path, "rb") as f: + image_bytes = f.read() + + client = genai.Client(api_key=api_key) + prompt = build_relight_prompt(background_prompt, lighting_prompt) + + response = client.models.generate_content( + model=MODEL_NAME, + contents=[ + types.Part.from_bytes(data=image_bytes, mime_type="image/png"), + prompt, + ], + ) + + for part in response.candidates[0].content.parts: + if getattr(part, "inline_data", None) and part.inline_data.data: + with open(out_path, "wb") as f: + f.write(part.inline_data.data) + return out_path + + raise RuntimeError("Nano Banana returned no image (likely content policy)") +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +cd backend && pytest tests/unit/test_frame_relight.py -v +``` + +Expected: 5 passed. + +- [ ] **Step 5: Run the full backend gate** + +```bash +cd backend && pytest -m "not e2e" -q +``` + +Expected: 157 passed (150 prior + 7 new) — or close to it. If anything else breaks, fix before continuing. + +- [ ] **Step 6: Commit** + +```bash +git add backend/app/ml/frame_relight.py backend/tests/unit/test_frame_relight.py +git commit -m "feat(ai-restyle): ml/frame_relight Nano Banana wrapper + safety constraints" +``` + +--- + +## Phase 2 — Backend ML: video restyle + pipeline (~1.5 days) + +### Task 2.1: Video-to-video restyle module + +**Files:** +- Create: `backend/app/ml/video_restyle.py` +- Test: `backend/tests/unit/test_video_restyle.py` +- Reference pattern: `backend/app/saas/pipeline.py:generate_talking_head` (existing fal.ai integration) +- Depends on: Phase 0 spike — replace `<MODEL_NAME>` and `<PAYLOAD_KEY>` placeholders below with the values from the spike doc. + +- [ ] **Step 1: Write the failing test** + +```python +# backend/tests/unit/test_video_restyle.py +"""Tests for ml/video_restyle: fal.ai video-to-video call. + +fal_client is mocked. We assert the model id, payload keys, and that the +output file is written from the returned URL. +""" +from __future__ import annotations + +import io +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from app.ml.video_restyle import MODEL_ID, restyle_video + + +def test_restyle_video_calls_chosen_model(tmp_path): + video = tmp_path / "in.mp4" + video.write_bytes(b"fake video") + ref = tmp_path / "ref.png" + ref.write_bytes(b"fake png") + out = tmp_path / "out.mp4" + + fake_run = MagicMock(return_value={"video": {"url": "https://fake.fal.ai/out.mp4"}}) + fake_upload = MagicMock(side_effect=["https://fake/video.mp4", "https://fake/ref.png"]) + + fake_resp = MagicMock() + fake_resp.iter_bytes = MagicMock(return_value=iter([b"video-bytes"])) + fake_resp.raise_for_status = MagicMock() + fake_httpx = MagicMock() + fake_httpx.stream.return_value.__enter__.return_value = fake_resp + + with patch("app.ml.video_restyle.fal_client.subscribe", fake_run), \ + patch("app.ml.video_restyle.fal_client.upload_file", fake_upload), \ + patch("app.ml.video_restyle.httpx.Client", return_value=fake_httpx): + result = restyle_video( + api_key="fake", + video_path=str(video), + reference_frame_path=str(ref), + out_path=str(out), + ) + + assert result == str(out) + assert out.exists() + assert out.read_bytes() == b"video-bytes" + fake_run.assert_called_once() + assert fake_run.call_args.args[0] == MODEL_ID + + +def test_restyle_video_missing_inputs(tmp_path): + out = tmp_path / "out.mp4" + with pytest.raises(FileNotFoundError): + restyle_video( + api_key="x", + video_path=str(tmp_path / "missing.mp4"), + reference_frame_path=str(tmp_path / "missing.png"), + out_path=str(out), + ) +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd backend && pytest tests/unit/test_video_restyle.py -v +``` + +Expected: `ImportError: cannot import name 'MODEL_ID' from 'app.ml.video_restyle'`. + +- [ ] **Step 3: Implement video_restyle** + +```python +# backend/app/ml/video_restyle.py +"""fal.ai video-to-video restyle: send a source video + reference frame, +download the restyled output. + +Model and payload shape were chosen during Phase 0 spike. See +``docs/superpowers/specs/2026-05-20-ai-restyle-phase0-spike.md``. +""" +from __future__ import annotations + +import os + +import fal_client +import httpx + +# Set during Phase 0 spike. Replace if the spike picked a different model. +MODEL_ID = "fal-ai/wan/v2.5/turbo/video-to-video" + + +def restyle_video( + api_key: str, + video_path: str, + reference_frame_path: str, + out_path: str, +) -> str: + """Run fal.ai v2v with the source video and reference frame. Writes + the restyled MP4 to ``out_path`` and returns it. + + Raises ``FileNotFoundError`` if either input is missing. fal_client + raises its own errors for API / network failures — those propagate. + """ + if not os.path.exists(video_path): + raise FileNotFoundError(f"Input video not found: {video_path}") + if not os.path.exists(reference_frame_path): + raise FileNotFoundError(f"Reference frame not found: {reference_frame_path}") + os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True) + + os.environ["FAL_KEY"] = api_key # fal_client reads this env var + + video_url = fal_client.upload_file(video_path) + ref_url = fal_client.upload_file(reference_frame_path) + + response = fal_client.subscribe( + MODEL_ID, + arguments={ + "video_url": video_url, + "image_url": ref_url, + "prompt": "Match the lighting and background of the reference image. Preserve all motion, subject, and camera angle from the source video.", + }, + with_logs=False, + ) + + out_url = response["video"]["url"] + with httpx.Client(timeout=300.0) as client: + with client.stream("GET", out_url) as r: + r.raise_for_status() + with open(out_path, "wb") as f: + for chunk in r.iter_bytes(): + f.write(chunk) + + return out_path +``` + +> ⚠️ **If Phase 0 picked a different model**, replace `MODEL_ID` and adjust the `arguments` keys (`video_url`, `image_url`, `prompt`) to match the model's documented input schema. Run the spike's recorded sample payload through this function manually to verify before continuing. + +- [ ] **Step 4: Run test to verify it passes** + +```bash +cd backend && pytest tests/unit/test_video_restyle.py -v +``` + +Expected: 2 passed. + +- [ ] **Step 5: Commit** + +```bash +git add backend/app/ml/video_restyle.py backend/tests/unit/test_video_restyle.py +git commit -m "feat(ai-restyle): ml/video_restyle fal.ai v2v wrapper" +``` + +--- + +### Task 2.2: Pipeline orchestrator + +**Files:** +- Create: `backend/app/saas/restyle_pipeline.py` +- Test: `backend/tests/unit/test_restyle_pipeline.py` +- Reference pattern: `backend/app/editing/auto_pipeline.py` (Short-form's auto-pipeline) — orchestrates multiple ML modules and writes progress to a job dict. + +- [ ] **Step 1: Write the failing test** + +```python +# backend/tests/unit/test_restyle_pipeline.py +"""Tests for saas/restyle_pipeline: the 7-step orchestrator. + +Each ML module is mocked. We verify the pipeline: +- Calls the steps in order +- Writes status='processing' → 'completed' to the supplied jobs dict +- Captures the result video_url + duration +- Marks status='failed' if any step raises +""" +from __future__ import annotations + +import asyncio +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from app.saas.restyle_pipeline import run_restyle_job + + +@pytest.fixture +def fake_jobs(tmp_path): + """A jobs dict primed with an in-progress entry, plus a fake input file.""" + job_id = "test-restyle-job" + input_file = tmp_path / "in.mp4" + input_file.write_bytes(b"\x00\x00\x00\x18ftypisom" + b"\x00" * 200) + return { + job_id: { + "status": "processing", + "logs": [], + "result": None, + "progress_pct": 0, + }, + }, job_id, input_file + + +def test_pipeline_happy_path(tmp_path, fake_jobs, monkeypatch): + jobs, job_id, input_file = fake_jobs + output_dir = tmp_path / "output" / job_id + output_dir.mkdir(parents=True) + + monkeypatch.setattr("app.saas.restyle_pipeline.OUTPUT_DIR", str(tmp_path / "output")) + + with patch("app.saas.restyle_pipeline.probe_duration", return_value=12.0), \ + patch("app.saas.restyle_pipeline.extract_first_frame") as fake_extract, \ + patch("app.saas.restyle_pipeline.relight_frame") as fake_relight, \ + patch("app.saas.restyle_pipeline.restyle_video") as fake_v2v, \ + patch("app.saas.restyle_pipeline.mux_video_audio") as fake_mux: + # Each step "writes" its output by returning the path it was given + fake_extract.side_effect = lambda src, dst: dst + fake_relight.side_effect = lambda **kw: kw["out_path"] + fake_v2v.side_effect = lambda **kw: kw["out_path"] + fake_mux.side_effect = lambda video, audio_src, out: out + + asyncio.run(run_restyle_job( + jobs=jobs, + job_id=job_id, + input_path=str(input_file), + background_prompt="bahamas", + lighting_prompt="golden", + gemini_key="g", + fal_key="f", + )) + + job = jobs[job_id] + assert job["status"] == "completed" + assert job["result"]["video_url"].endswith(".mp4") + assert job["result"]["duration_sec"] == 12.0 + assert job["progress_pct"] == 100 + fake_extract.assert_called_once() + fake_relight.assert_called_once() + fake_v2v.assert_called_once() + fake_mux.assert_called_once() + + +def test_pipeline_marks_failed_when_relight_raises(tmp_path, fake_jobs, monkeypatch): + jobs, job_id, input_file = fake_jobs + monkeypatch.setattr("app.saas.restyle_pipeline.OUTPUT_DIR", str(tmp_path / "output")) + + with patch("app.saas.restyle_pipeline.probe_duration", return_value=10.0), \ + patch("app.saas.restyle_pipeline.extract_first_frame", side_effect=lambda src, dst: dst), \ + patch("app.saas.restyle_pipeline.relight_frame", side_effect=RuntimeError("content policy")), \ + patch("app.saas.restyle_pipeline.restyle_video") as fake_v2v, \ + patch("app.saas.restyle_pipeline.mux_video_audio") as fake_mux: + asyncio.run(run_restyle_job( + jobs=jobs, + job_id=job_id, + input_path=str(input_file), + background_prompt="x", + lighting_prompt="y", + gemini_key="g", + fal_key="f", + )) + + job = jobs[job_id] + assert job["status"] == "failed" + assert any("content policy" in line for line in job["logs"]) + fake_v2v.assert_not_called() + fake_mux.assert_not_called() + + +def test_pipeline_rejects_videos_longer_than_30s(tmp_path, fake_jobs, monkeypatch): + jobs, job_id, input_file = fake_jobs + monkeypatch.setattr("app.saas.restyle_pipeline.OUTPUT_DIR", str(tmp_path / "output")) + + with patch("app.saas.restyle_pipeline.probe_duration", return_value=45.0): + asyncio.run(run_restyle_job( + jobs=jobs, + job_id=job_id, + input_path=str(input_file), + background_prompt="x", + lighting_prompt="y", + gemini_key="g", + fal_key="f", + )) + + job = jobs[job_id] + assert job["status"] == "failed" + assert any("30s" in line or "duration" in line.lower() for line in job["logs"]) +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd backend && pytest tests/unit/test_restyle_pipeline.py -v +``` + +Expected: `ImportError: cannot import name 'run_restyle_job'`. + +- [ ] **Step 3: Implement the orchestrator** + +```python +# backend/app/saas/restyle_pipeline.py +"""AI Restyle pipeline orchestrator. + +7-step pipeline: + 1. Validate (caller already enforces MIME + ftyp + size) + 2. Probe duration via ffprobe; reject if >30s + 3. Extract first frame to PNG + 4. Nano Banana relight of that frame + 5. fal.ai video-to-video with source + relit frame as reference + 6. Mux original audio back onto the restyled video + 7. Persist result_url to jobs dict; mark status=completed + +Any step's exception marks the job 'failed' with the exception message +appended to logs. The job dict is mutated in place so the route handler +and frontend poll see progress. +""" +from __future__ import annotations + +import asyncio +import os +from functools import partial +from typing import Any, Dict + +from app.ml.frame_extract import extract_first_frame +from app.ml.frame_relight import relight_frame +from app.ml.video_restyle import restyle_video +from app.video.ffmpeg import probe_duration, mux_video_audio + +OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "output") +MAX_DURATION_SEC = 30.0 + + +def _log(jobs: Dict[str, Any], job_id: str, line: str, pct: int | None = None) -> None: + job = jobs.get(job_id) + if job is None: + return + job["logs"].append(line) + if pct is not None: + job["progress_pct"] = pct + + +async def run_restyle_job( + jobs: Dict[str, Any], + job_id: str, + input_path: str, + background_prompt: str, + lighting_prompt: str, + gemini_key: str, + fal_key: str, +) -> None: + """Run the full restyle pipeline for ``job_id``. Mutates ``jobs[job_id]`` in place.""" + output_dir = os.path.join(OUTPUT_DIR, job_id) + os.makedirs(output_dir, exist_ok=True) + base = os.path.splitext(os.path.basename(input_path))[0] + + loop = asyncio.get_event_loop() + + try: + # Step 2 — duration probe + _log(jobs, job_id, "🔎 Probing video duration…", pct=5) + duration = await loop.run_in_executor(None, partial(probe_duration, input_path)) + if duration > MAX_DURATION_SEC: + raise ValueError(f"Video duration {duration:.1f}s exceeds 30s cap for AI Restyle v1") + + # Step 3 — extract first frame + _log(jobs, job_id, "🎞️ Extracting first frame…", pct=10) + frame_path = os.path.join(output_dir, f"{base}_frame.png") + await loop.run_in_executor(None, partial(extract_first_frame, input_path, frame_path)) + + # Step 4 — Nano Banana relight + _log(jobs, job_id, "🪄 Relighting frame with Nano Banana…", pct=20) + relit_path = os.path.join(output_dir, f"{base}_relit.png") + await loop.run_in_executor( + None, + partial( + relight_frame, + api_key=gemini_key, + frame_path=frame_path, + background_prompt=background_prompt, + lighting_prompt=lighting_prompt, + out_path=relit_path, + ), + ) + _log(jobs, job_id, "💰 Nano Banana relight: ~$0.039 per call", pct=30) + + # Step 5 — fal.ai video-to-video + _log(jobs, job_id, "🎬 Restyling video via fal.ai (~30-90s)…", pct=40) + restyled_noaudio = os.path.join(output_dir, f"{base}_restyled_noaudio.mp4") + await loop.run_in_executor( + None, + partial( + restyle_video, + api_key=fal_key, + video_path=input_path, + reference_frame_path=relit_path, + out_path=restyled_noaudio, + ), + ) + cost_est = round(duration * 0.04, 2) + _log(jobs, job_id, f"💰 fal.ai v2v: ~${cost_est:.2f} ({duration:.1f}s × $0.04/s)", pct=85) + + # Step 6 — mux original audio + _log(jobs, job_id, "🔊 Muxing original audio back…", pct=90) + final_out = os.path.join(output_dir, f"restyled_{os.path.basename(input_path)}") + await loop.run_in_executor( + None, + partial(mux_video_audio, restyled_noaudio, input_path, final_out), + ) + + # Step 7 — persist result + jobs[job_id]["result"] = { + "video_url": f"/videos/{job_id}/{os.path.basename(final_out)}", + "original_url": f"/videos/{job_id}/{os.path.basename(input_path)}", + "duration_sec": duration, + } + jobs[job_id]["status"] = "completed" + jobs[job_id]["progress_pct"] = 100 + _log(jobs, job_id, "✅ AI Restyle complete.") + + except Exception as exc: + jobs[job_id]["status"] = "failed" + jobs[job_id]["logs"].append(f"❌ {exc}") +``` + +- [ ] **Step 4: Run test to verify it passes** + +```bash +cd backend && pytest tests/unit/test_restyle_pipeline.py -v +``` + +Expected: 3 passed. + +- [ ] **Step 5: Backend gate** + +```bash +cd backend && pytest -m "not e2e" -q +``` + +Expected: all green, ~160 tests. + +- [ ] **Step 6: Commit** + +```bash +git add backend/app/saas/restyle_pipeline.py backend/tests/unit/test_restyle_pipeline.py +git commit -m "feat(ai-restyle): saas/restyle_pipeline orchestrator" +``` + +--- + +## Phase 3 — Backend routes (~0.5 day) + +### Task 3.1: Routes scaffold + Pydantic + endpoints + +**Files:** +- Create: `backend/app/routes/__init__.py` (empty) +- Create: `backend/app/routes/ai_restyle.py` +- Modify: `backend/app/main.py` (one-line router registration + share the jobs dict) +- Update: `backend/tests/snapshots/baseline.openapi.json` (regen) +- Reference: `backend/app/main.py:1275` (existing color_grade route — same shape we want) + +- [ ] **Step 1: Create the routes package** + +```bash +echo '"""FastAPI routers, split out of main.py incrementally."""' > backend/app/routes/__init__.py +``` + +- [ ] **Step 2: Write the routes module** + +Inspect how `main.py` exposes the shared `jobs` dict — currently it's a module-level dict. The router needs access. Pattern: expose `jobs` via `app.state` or import directly from `app.main`. Import directly to match the existing `_resolve_clip_input` pattern. + +```python +# backend/app/routes/ai_restyle.py +"""AI Restyle FastAPI router. + +Endpoints: +- POST /api/restyle start a restyle job +- GET /api/restyle/{job_id} poll status + +Job state is the same in-memory dict as the rest of main.py (jobs[]). +""" +from __future__ import annotations + +import asyncio +import os +import shutil +import uuid +from typing import Any, Dict, Optional + +from fastapi import APIRouter, BackgroundTasks, File, Form, HTTPException, Request, UploadFile +from pydantic import BaseModel, Field + +router = APIRouter() + +# Reuse the same OUTPUT_DIR / jobs dict / upload guard as main.py. +# Imports are deferred to avoid a circular import at module load time. + +MAX_DURATION_SEC = 30.0 +MAX_PROMPT_LEN = 500 + + +class RestyleStatus(BaseModel): + status: str + logs: list[str] + progress_pct: int = Field(default=0, ge=0, le=100) + result: Optional[dict] = None + + +@router.post("/api/restyle") +async def start_restyle( + request: Request, + background_tasks: BackgroundTasks, + file: UploadFile = File(...), + background_prompt: str = Form(...), + lighting_prompt: str = Form(...), +): + """Start a restyle job. Returns ``{job_id}`` immediately; poll + ``GET /api/restyle/{job_id}`` for status.""" + from app.main import jobs, _ensure_video_upload, OUTPUT_DIR, UPLOAD_DIR + from app.saas.restyle_pipeline import run_restyle_job + + if len(background_prompt) > MAX_PROMPT_LEN or len(lighting_prompt) > MAX_PROMPT_LEN: + raise HTTPException(status_code=413, detail=f"Prompt fragments must be ≤{MAX_PROMPT_LEN} chars each") + + gemini_key = request.headers.get("X-Gemini-Key") + fal_key = request.headers.get("X-Fal-Key") + if not gemini_key: + raise HTTPException(status_code=401, detail="X-Gemini-Key header required") + if not fal_key: + raise HTTPException(status_code=401, detail="X-Fal-Key header required") + + job_id = str(uuid.uuid4()) + output_dir = os.path.join(OUTPUT_DIR, job_id) + os.makedirs(output_dir, exist_ok=True) + + # Persist the upload (re-using main.py's guard for MIME+ftyp) + first_chunk = await file.read(4096) + _ensure_video_upload(file.filename, first_chunk) + input_path = os.path.join(output_dir, file.filename or f"{job_id}.mp4") + with open(input_path, "wb") as out: + out.write(first_chunk) + shutil.copyfileobj(file.file, out) + + jobs[job_id] = { + "status": "processing", + "logs": [f"📥 Received {os.path.basename(input_path)}"], + "progress_pct": 0, + "result": None, + "product": "ai-restyle", # tag so /api/status doesn't get confused + } + + background_tasks.add_task( + asyncio.create_task, + run_restyle_job( + jobs=jobs, + job_id=job_id, + input_path=input_path, + background_prompt=background_prompt, + lighting_prompt=lighting_prompt, + gemini_key=gemini_key, + fal_key=fal_key, + ), + ) + + return {"job_id": job_id} + + +@router.get("/api/restyle/{job_id}", response_model=RestyleStatus) +async def restyle_status(job_id: str): + from app.main import jobs + job = jobs.get(job_id) + if not job: + raise HTTPException(status_code=404, detail="Job not found") + return RestyleStatus( + status=job["status"], + logs=job.get("logs", []), + progress_pct=job.get("progress_pct", 0), + result=job.get("result"), + ) +``` + +- [ ] **Step 3: Write the route test** + +```python +# backend/tests/api/test_ai_restyle.py +"""Contract tests for /api/restyle and /api/restyle/{job_id}.""" +from __future__ import annotations + +import io +from pathlib import Path + +import pytest + + +@pytest.fixture +def restyle_client(tmp_path, monkeypatch): + (tmp_path / "uploads").mkdir(exist_ok=True) + (tmp_path / "output").mkdir(exist_ok=True) + monkeypatch.chdir(tmp_path) + from fastapi.testclient import TestClient + from app.main import app + return TestClient(app) + + +def _mp4_bytes() -> bytes: + """Minimum-viable MP4 header to pass _ensure_video_upload's ftyp check.""" + return b"\x00\x00\x00\x18ftypisom" + b"\x00" * 200 + + +def test_post_restyle_requires_gemini_key(restyle_client): + res = restyle_client.post( + "/api/restyle", + files={"file": ("clip.mp4", io.BytesIO(_mp4_bytes()), "video/mp4")}, + data={"background_prompt": "beach", "lighting_prompt": "golden"}, + headers={"X-Fal-Key": "fake"}, + ) + assert res.status_code == 401 + assert "Gemini" in res.json()["detail"] + + +def test_post_restyle_requires_fal_key(restyle_client): + res = restyle_client.post( + "/api/restyle", + files={"file": ("clip.mp4", io.BytesIO(_mp4_bytes()), "video/mp4")}, + data={"background_prompt": "beach", "lighting_prompt": "golden"}, + headers={"X-Gemini-Key": "fake"}, + ) + assert res.status_code == 401 + assert "Fal" in res.json()["detail"] + + +def test_post_restyle_rejects_long_prompt(restyle_client): + res = restyle_client.post( + "/api/restyle", + files={"file": ("clip.mp4", io.BytesIO(_mp4_bytes()), "video/mp4")}, + data={"background_prompt": "x" * 600, "lighting_prompt": "y"}, + headers={"X-Gemini-Key": "g", "X-Fal-Key": "f"}, + ) + assert res.status_code == 413 + + +def test_get_restyle_status_not_found(restyle_client): + res = restyle_client.get("/api/restyle/nonexistent") + assert res.status_code == 404 +``` + +- [ ] **Step 4: Run the new tests — expect ImportError** + +```bash +cd backend && pytest tests/api/test_ai_restyle.py -v +``` + +Expected: collection error / import error because the router isn't registered yet. + +- [ ] **Step 5: Register the router in main.py** + +Find the existing FastAPI app object (`app = FastAPI(...)` near the top of `main.py`). After it's instantiated, add: + +```python +# main.py — after app = FastAPI(...) +from app.routes.ai_restyle import router as ai_restyle_router +app.include_router(ai_restyle_router) +``` + +- [ ] **Step 6: Run the route tests** + +```bash +cd backend && pytest tests/api/test_ai_restyle.py -v +``` + +Expected: 4 passed. + +- [ ] **Step 7: Regenerate the OpenAPI snapshot** + +```bash +cd backend && rm tests/snapshots/baseline.openapi.json && pytest tests/api/test_openapi_contract.py -v +``` + +Expected: the contract test re-baselines and passes. Inspect the diff: + +```bash +git diff backend/tests/snapshots/baseline.openapi.json | grep -E '"/api/restyle' || echo "WARNING: new routes not in baseline" +``` + +Expected: matches show `"/api/restyle"` and `"/api/restyle/{job_id}"` paths are added. + +- [ ] **Step 8: Backend gate** + +```bash +cd backend && pytest -m "not e2e" -q +``` + +Expected: ~167 tests, all green. + +- [ ] **Step 9: Commit** + +```bash +git add backend/app/routes/ backend/app/main.py backend/tests/api/test_ai_restyle.py backend/tests/snapshots/baseline.openapi.json +git commit -m "feat(ai-restyle): /api/restyle + /api/restyle/{job_id} routes" +``` + +--- + +## Phase 4 — Frontend (~2 days, split across 4a + 4b) + +### Task 4a.1: Preset store (shared dependency for wizard + Settings tab) + +**Files:** +- Create: `frontend/src/state/aiRestylePresets.js` +- Reference pattern: `frontend/src/state/keysStore.js` (event-broadcasting localStorage store) + +- [ ] **Step 1: Implement the preset store** + +```javascript +// frontend/src/state/aiRestylePresets.js +// AI Restyle preset store. Two dimensions (backgrounds + lightings), each a +// list of { id, label, prompt } records with one marked as default via +// `defaultBackgroundId` / `defaultLightingId`. Persisted to localStorage and +// broadcast via a custom event so any subscribed component re-renders. +// +// Mirrors the keysStore.js + brandKit.js pattern. Seeded with 5 hand-tuned +// presets per dimension on first load. + +import { useEffect, useState } from 'react'; + +const STORAGE_KEY = 'openshorts.aiRestyle.presets'; +const EVENT = 'openshorts:ai-restyle-presets-changed'; + +const SEED = { + backgrounds: [ + { id: 'studio-white', label: 'Studio white', prompt: 'clean white seamless backdrop, minimalist photo studio, no clutter, perfect color separation' }, + { id: 'sunlit-office', label: 'Sunlit office', prompt: 'bright modern office interior with floor-to-ceiling windows, soft natural light, plants, wooden desk' }, + { id: 'bahamas-beach', label: 'Bahamas beach', prompt: 'tropical beach with palm trees, turquoise ocean water in the distance, soft white sand' }, + { id: 'cyberpunk-neon', label: 'Cyberpunk neon', prompt: 'nighttime city street with vivid neon signs, pink-and-cyan color palette, light fog' }, + { id: 'cinematic-forest', label: 'Cinematic forest', prompt: 'deep forest with dappled sunlight through tall pine trees, mossy ground, atmospheric haze' }, + ], + lightings: [ + { id: 'studio-softbox', label: 'Studio softbox', prompt: 'soft diffused studio softbox lighting from camera-left, gentle fill on the right, no harsh shadows' }, + { id: 'sunlit-office', label: 'Sunlit office', prompt: 'bright daylight pouring through large windows, soft fill on subject\'s face' }, + { id: 'golden-hour', label: 'Golden hour', prompt: 'warm golden-hour sun low and to the side, long shadows, amber and rose tones' }, + { id: 'cinematic-moody', label: 'Cinematic moody', prompt: 'low-key cinematic lighting with strong directional key, deep shadows, single soft fill' }, + { id: 'neon-nighttime', label: 'Neon nighttime', prompt: 'colored neon spill lighting (pink and cyan accents), low ambient, subject lit from multiple sides' }, + ], + defaultBackgroundId: 'studio-white', + defaultLightingId: 'studio-softbox', +}; + +function read() { + try { + const raw = localStorage.getItem(STORAGE_KEY); + if (!raw) return seedOnce(); + const data = JSON.parse(raw); + if (!data.backgrounds || !data.lightings) return seedOnce(); + return data; + } catch { + return seedOnce(); + } +} + +function seedOnce() { + try { localStorage.setItem(STORAGE_KEY, JSON.stringify(SEED)); } catch {/* ignore */} + return SEED; +} + +function write(next) { + try { localStorage.setItem(STORAGE_KEY, JSON.stringify(next)); } catch {/* ignore */} + window.dispatchEvent(new CustomEvent(EVENT, { detail: next })); +} + +export function getPresets() { return read(); } + +export function setDefault(dimension, id) { + const cur = read(); + const key = dimension === 'background' ? 'defaultBackgroundId' : 'defaultLightingId'; + write({ ...cur, [key]: id }); +} + +export function upsertPreset(dimension, preset) { + const cur = read(); + const list = dimension === 'background' ? cur.backgrounds : cur.lightings; + const next = list.some(p => p.id === preset.id) + ? list.map(p => p.id === preset.id ? preset : p) + : [...list, preset]; + const dimKey = dimension === 'background' ? 'backgrounds' : 'lightings'; + write({ ...cur, [dimKey]: next }); +} + +export function deletePreset(dimension, id) { + const cur = read(); + const list = dimension === 'background' ? cur.backgrounds : cur.lightings; + const defaultKey = dimension === 'background' ? 'defaultBackgroundId' : 'defaultLightingId'; + if (cur[defaultKey] === id) return; // can't delete the default + const next = list.filter(p => p.id !== id); + const dimKey = dimension === 'background' ? 'backgrounds' : 'lightings'; + write({ ...cur, [dimKey]: next }); +} + +export function useAIRestylePresets() { + const [state, setState] = useState(() => read()); + useEffect(() => { + const onChange = (e) => setState(e.detail || read()); + window.addEventListener(EVENT, onChange); + return () => window.removeEventListener(EVENT, onChange); + }, []); + return state; +} +``` + +- [ ] **Step 2: Smoke-check the store in the browser** + +```bash +cd "/Users/matissevansteenbergen/Downloads/AGENTIC WORKLFOWS/PERSONAL/Auto-shorts (TODO)/openshorts/frontend" && npm run build +``` + +Expected: 0 errors. (No unit tests in this repo for frontend stores — pattern matches `keysStore.js` which is also untested. Smoke-tested via the wizard in Phase 6.) + +- [ ] **Step 3: Commit** + +```bash +git add frontend/src/state/aiRestylePresets.js +git commit -m "feat(ai-restyle): preset store (localStorage + event broadcast)" +``` + +--- + +### Task 4b.1: AIRestyle pages folder + Wizard.jsx + +**Files:** +- Create: `frontend/src/pages/AIRestyle/index.jsx` +- Create: `frontend/src/pages/AIRestyle/Wizard.jsx` +- Create: `frontend/src/pages/AIRestyle/History.jsx` (read-only stub — past job list, deferred to follow-up) +- Reference pattern: `frontend/src/pages/ShortForm/index.jsx` + `Wizard.jsx` + +- [ ] **Step 1: Implement index.jsx (routing wrapper)** + +```jsx +// frontend/src/pages/AIRestyle/index.jsx +// AI Restyle page — wizard + history tabs. Mirrors pages/ShortForm/index.jsx. +import { NavLink, Route, Routes } from 'react-router-dom'; +import Wizard from './Wizard.jsx'; +import History from './History.jsx'; + +export default function AIRestyle() { + return ( + <div className="h-full flex flex-col"> + <div className="px-6 pt-3 pb-2 flex items-center gap-4 border-b border-border bg-background shrink-0"> + <h1 className="text-[18px] font-semibold">AI Restyle</h1> + <nav className="flex items-center gap-2 ml-4"> + <NavLink + to="" + end + className={({ isActive }) => + `px-3 py-1.5 rounded-md text-[12px] ${isActive ? 'bg-white/10 text-white' : 'text-zinc-400 hover:text-white'}` + } + > + Wizard + </NavLink> + <NavLink + to="history" + className={({ isActive }) => + `px-3 py-1.5 rounded-md text-[12px] ${isActive ? 'bg-white/10 text-white' : 'text-zinc-400 hover:text-white'}` + } + > + History + </NavLink> + </nav> + </div> + <div className="flex-1 overflow-hidden"> + <Routes> + <Route index element={<Wizard />} /> + <Route path="history" element={<History />} /> + </Routes> + </div> + </div> + ); +} +``` + +- [ ] **Step 2: Implement Wizard.jsx (3-step wrapper)** + +```jsx +// frontend/src/pages/AIRestyle/Wizard.jsx +// 3-step AI Restyle wizard. Same useWizard pattern as ShortForm/Wizard.jsx. +import { Check } from 'lucide-react'; +import { useWizard } from '../../hooks/useWizard.js'; +import Upload from './steps/Upload.jsx'; +import Configure from './steps/Configure.jsx'; +import Review from './steps/Review.jsx'; + +const STEPS = [ + { id: 'upload', label: 'Upload' }, + { id: 'configure', label: 'Configure' }, + { id: 'review', label: 'Review', lock: false }, +]; + +const INITIAL = { + file: null, + selection: { + backgroundPresetId: null, + lightingPresetId: null, + backgroundPromptOverride: null, + lightingPromptOverride: null, + }, + job: null, +}; + +const STORAGE_KEY = 'openshorts.aiRestyle.wizard'; + +function needsFreshUpload(data) { + return data?.file && !(data.file.file instanceof File); +} + +export default function Wizard() { + const w = useWizard({ + steps: STEPS, + initialData: INITIAL, + storageKey: STORAGE_KEY, + resetOnRehydrate: needsFreshUpload, + }); + + return ( + <div className="h-full flex flex-col"> + <StepIndicator wizard={w} /> + <div className="flex-1 overflow-hidden"> + {w.currentStep.id === 'upload' && <Upload wizard={w} />} + {w.currentStep.id === 'configure' && <Configure wizard={w} />} + {w.currentStep.id === 'review' && <Review wizard={w} />} + </div> + </div> + ); +} + +function StepIndicator({ wizard }) { + return ( + <div className="px-6 py-4 border-b border-border bg-background shrink-0"> + <div className="flex items-center gap-3"> + {wizard.steps.map((s, i) => { + const active = i === wizard.step; + const done = i < wizard.step; + const reachable = i <= wizard.step; + return ( + <div key={s.id} className="flex items-center gap-3 flex-1"> + <button + onClick={() => reachable && wizard.goto(i)} + disabled={!reachable} + className={`flex items-center gap-2 disabled:cursor-not-allowed ${ + active ? 'text-white' : done ? 'text-zinc-300' : 'text-zinc-600' + }`} + > + <span className={`w-6 h-6 flex items-center justify-center rounded-full text-[11px] font-medium ${ + active ? 'bg-primary text-white' : + done ? 'bg-success/20 text-success border border-success/40' : + 'bg-white/5 text-zinc-500 border border-border' + }`}> + {done ? <Check size={12} /> : i + 1} + </span> + <span className="text-[12px]">{s.label}</span> + </button> + {i < wizard.steps.length - 1 && ( + <div className={`flex-1 h-px ${done ? 'bg-success/40' : 'bg-border'}`} /> + )} + </div> + ); + })} + </div> + </div> + ); +} +``` + +- [ ] **Step 3: Implement History.jsx (placeholder)** + +```jsx +// frontend/src/pages/AIRestyle/History.jsx +export default function History() { + return ( + <div className="h-full flex items-center justify-center text-zinc-500 text-[12px]"> + Past AI Restyle jobs will appear here. (Tracking lands in a follow-up.) + </div> + ); +} +``` + +- [ ] **Step 4: Commit (pre-step files)** + +```bash +git add frontend/src/pages/AIRestyle/ +git commit -m "feat(ai-restyle): scaffold AIRestyle page + Wizard.jsx (no steps yet)" +``` + +--- + +### Task 4b.2: Upload step + +**Files:** +- Create: `frontend/src/pages/AIRestyle/steps/Upload.jsx` +- Reference pattern: `frontend/src/pages/ShortForm/steps/Upload.jsx` + +- [ ] **Step 1: Implement Upload step** + +```jsx +// frontend/src/pages/AIRestyle/steps/Upload.jsx +// AI Restyle Upload step. Single file, MP4/MOV, ≤30s. Probes duration on +// client (HTMLVideoElement) before allowing Continue. +import { useRef, useState } from 'react'; +import { Upload as UploadIcon, X } from 'lucide-react'; + +const MAX_SEC = 30; +const ACCEPT = 'video/mp4,video/quicktime,.mp4,.mov'; + +async function probeDuration(file) { + return new Promise((resolve) => { + const url = URL.createObjectURL(file); + const v = document.createElement('video'); + v.preload = 'metadata'; + v.onloadedmetadata = () => { URL.revokeObjectURL(url); resolve(v.duration); }; + v.onerror = () => { URL.revokeObjectURL(url); resolve(null); }; + v.src = url; + }); +} + +export default function Upload({ wizard }) { + const inputRef = useRef(null); + const [error, setError] = useState(null); + const data = wizard.data.file; + + async function onChange(e) { + const f = e.target.files?.[0]; + if (!f) return; + setError(null); + + const ext = f.name.toLowerCase().match(/\.(mp4|mov)$/); + if (!ext) { setError('File must be MP4 or MOV.'); return; } + const dur = await probeDuration(f); + if (dur == null) { setError('Could not read video duration.'); return; } + if (dur > MAX_SEC) { + setError(`AI Restyle v1 caps at 30s. Your file is ${dur.toFixed(1)}s. Trim it first or use Short-form.`); + return; + } + + wizard.setData({ file: { id: `${Date.now()}-${Math.random().toString(36).slice(2, 7)}`, name: f.name, size: f.size, durationSec: dur, file: f } }); + } + + function clearFile() { + wizard.setData({ file: null }); + if (inputRef.current) inputRef.current.value = ''; + } + + return ( + <div className="h-full overflow-y-auto custom-scrollbar p-8"> + <div className="max-w-2xl mx-auto"> + <h1 className="text-[24px] font-semibold mb-2">Upload a video</h1> + <p className="text-[13px] text-zinc-400 mb-6"> + MP4 or MOV, up to 30 seconds. We'll relight the lighting and replace + the background while keeping your motion and audio. + </p> + + {!data ? ( + <button + onClick={() => inputRef.current?.click()} + className="w-full border-2 border-dashed border-border rounded-lg p-12 flex flex-col items-center gap-3 hover:bg-white/5 transition" + > + <UploadIcon size={24} className="text-zinc-500" /> + <div className="text-[13px] text-zinc-300">Drop a video here or click to browse</div> + <div className="text-[11px] text-zinc-500">MP4 / MOV · ≤30 seconds · ≤2 GB</div> + </button> + ) : ( + <div className="rounded-lg border border-border bg-surface p-4 flex items-center justify-between"> + <div> + <div className="text-[13px] text-white font-medium truncate">{data.name}</div> + <div className="text-[11px] text-zinc-500 mt-0.5"> + {(data.size / 1024 / 1024).toFixed(1)} MB · {data.durationSec.toFixed(1)}s + </div> + </div> + <button onClick={clearFile} className="p-1.5 hover:bg-white/10 rounded text-zinc-400" aria-label="Remove"> + <X size={14} /> + </button> + </div> + )} + + <input ref={inputRef} type="file" accept={ACCEPT} onChange={onChange} className="hidden" /> + + {error && ( + <div className="mt-3 text-[12px] text-red-400" role="alert">{error}</div> + )} + + <div className="mt-6 flex justify-end"> + <button + onClick={wizard.next} + disabled={!data} + className="btn-primary px-4 py-2 text-[13px] disabled:opacity-40" + > + Continue → + </button> + </div> + </div> + </div> + ); +} +``` + +- [ ] **Step 2: Verify build** + +```bash +cd frontend && npm run build +``` + +Expected: 0 errors. + +- [ ] **Step 3: Commit** + +```bash +git add frontend/src/pages/AIRestyle/steps/Upload.jsx +git commit -m "feat(ai-restyle): wizard Upload step (client-side duration probe)" +``` + +--- + +### Task 4b.3: Configure step + +**Files:** +- Create: `frontend/src/pages/AIRestyle/steps/Configure.jsx` + +- [ ] **Step 1: Implement Configure step** + +```jsx +// frontend/src/pages/AIRestyle/steps/Configure.jsx +// Pick a Background preset + a Lighting preset. The effective prompt +// (preset.prompt joined by " • ") is shown in an editable textarea — +// editing overrides for this job only. POSTs /api/restyle on submit. +import { useEffect, useMemo, useState } from 'react'; +import { useAIRestylePresets } from '../../../state/aiRestylePresets.js'; +import { useKeys } from '../../../state/keysStore.js'; +import { getApiUrl } from '../../../config'; + +export default function Configure({ wizard }) { + const presets = useAIRestylePresets(); + const keys = useKeys(); + const sel = wizard.data.selection; + + // Initialize selection from defaults on first render + useEffect(() => { + if (sel.backgroundPresetId && sel.lightingPresetId) return; + wizard.setData({ + selection: { + ...sel, + backgroundPresetId: sel.backgroundPresetId || presets.defaultBackgroundId, + lightingPresetId: sel.lightingPresetId || presets.defaultLightingId, + }, + }); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [presets.defaultBackgroundId, presets.defaultLightingId]); + + const bgPreset = presets.backgrounds.find((p) => p.id === sel.backgroundPresetId); + const ltPreset = presets.lightings.find((p) => p.id === sel.lightingPresetId); + + const effectivePrompt = useMemo(() => { + const bg = sel.backgroundPromptOverride ?? bgPreset?.prompt ?? ''; + const lt = sel.lightingPromptOverride ?? ltPreset?.prompt ?? ''; + return `${bg}\n${lt}`; + }, [sel, bgPreset, ltPreset]); + + const [submitting, setSubmitting] = useState(false); + const [error, setError] = useState(null); + + function setBg(id) { + wizard.setData({ selection: { ...sel, backgroundPresetId: id, backgroundPromptOverride: null } }); + } + function setLt(id) { + wizard.setData({ selection: { ...sel, lightingPresetId: id, lightingPromptOverride: null } }); + } + function setOverride(text) { + const [bg, ...rest] = text.split('\n'); + wizard.setData({ + selection: { ...sel, backgroundPromptOverride: bg, lightingPromptOverride: rest.join('\n') }, + }); + } + + async function start() { + setError(null); + if (!keys.gemini) { setError('Set your Gemini key in Settings first.'); return; } + if (!keys.fal) { setError('Set your fal.ai key in Settings first.'); return; } + + const fd = new FormData(); + fd.append('file', wizard.data.file.file); + const [bgLine, ...rest] = effectivePrompt.split('\n'); + fd.append('background_prompt', bgLine.slice(0, 500)); + fd.append('lighting_prompt', rest.join('\n').slice(0, 500)); + + setSubmitting(true); + try { + const res = await fetch(getApiUrl('/api/restyle'), { + method: 'POST', + headers: { 'X-Gemini-Key': keys.gemini, 'X-Fal-Key': keys.fal }, + body: fd, + }); + if (!res.ok) throw new Error(await res.text()); + const { job_id } = await res.json(); + wizard.setData({ job: { jobId: job_id, status: 'processing', result: null, progressPct: 0, logs: [] } }); + wizard.next(); + } catch (e) { + setError(String(e.message || e)); + } finally { + setSubmitting(false); + } + } + + return ( + <div className="h-full overflow-y-auto custom-scrollbar p-8"> + <div className="max-w-2xl mx-auto"> + <h1 className="text-[24px] font-semibold mb-2">Configure restyle</h1> + <p className="text-[13px] text-zinc-400 mb-6"> + Pick a background and lighting preset. Tweak the prompt below if you want. + </p> + + <div className="grid grid-cols-2 gap-4 mb-4"> + <PresetSelect + label="Background" + value={sel.backgroundPresetId || ''} + onChange={setBg} + options={presets.backgrounds} + defaultId={presets.defaultBackgroundId} + /> + <PresetSelect + label="Lighting" + value={sel.lightingPresetId || ''} + onChange={setLt} + options={presets.lightings} + defaultId={presets.defaultLightingId} + /> + </div> + + <label className="block text-[11px] uppercase tracking-wider text-zinc-500 mb-2"> + Effective prompt (editable for this job) + </label> + <textarea + value={effectivePrompt} + onChange={(e) => setOverride(e.target.value)} + rows={5} + className="w-full bg-surface border border-border rounded-md p-3 text-[12px] text-zinc-200 font-mono leading-relaxed" + /> + + {error && <div className="mt-3 text-[12px] text-red-400" role="alert">{error}</div>} + + <div className="mt-6 flex justify-between"> + <button onClick={wizard.back} className="px-4 py-2 text-[13px] text-zinc-400 hover:text-white">← Back</button> + <button + onClick={start} + disabled={submitting} + className="btn-primary px-4 py-2 text-[13px] disabled:opacity-50" + > + {submitting ? 'Starting…' : 'Start restyle →'} + </button> + </div> + </div> + </div> + ); +} + +function PresetSelect({ label, value, onChange, options, defaultId }) { + return ( + <div> + <label className="block text-[11px] uppercase tracking-wider text-zinc-500 mb-2">{label}</label> + <select + value={value} + onChange={(e) => onChange(e.target.value)} + className="w-full bg-surface border border-border rounded-md px-3 py-2 text-[13px] text-zinc-200" + > + {options.map((p) => ( + <option key={p.id} value={p.id}> + {p.label}{p.id === defaultId ? ' ★' : ''} + </option> + ))} + </select> + </div> + ); +} +``` + +- [ ] **Step 2: Verify build** + +```bash +cd frontend && npm run build +``` + +Expected: 0 errors. + +- [ ] **Step 3: Commit** + +```bash +git add frontend/src/pages/AIRestyle/steps/Configure.jsx +git commit -m "feat(ai-restyle): wizard Configure step (preset dropdowns + override)" +``` + +--- + +### Task 4b.4: Review step + +**Files:** +- Create: `frontend/src/pages/AIRestyle/steps/Review.jsx` + +- [ ] **Step 1: Implement Review step** + +```jsx +// frontend/src/pages/AIRestyle/steps/Review.jsx +// Polls /api/restyle/{job_id} until terminal. Shows progress bar + log tail +// during processing. On completion: Before/After preview + Download + Send +// to Short-form CTA. +import { useEffect, useState } from 'react'; +import { Download, Eye } from 'lucide-react'; +import { useNavigate } from 'react-router-dom'; +import PhoneFrame from '../../../components/ui/PhoneFrame.jsx'; +import { getApiUrl } from '../../../config'; + +export default function Review({ wizard }) { + const job = wizard.data.job; + const file = wizard.data.file; + const [showOriginal, setShowOriginal] = useState(false); + const [sourceUrl, setSourceUrl] = useState(null); + const navigate = useNavigate(); + + // Blob URL for the original (Before view) + useEffect(() => { + if (!file?.file) { setSourceUrl(null); return; } + const u = URL.createObjectURL(file.file); + setSourceUrl(u); + return () => URL.revokeObjectURL(u); + }, [file?.file]); + + // Poll status until terminal + useEffect(() => { + if (!job?.jobId || job.status === 'completed' || job.status === 'failed') return; + let alive = true; + const tick = async () => { + try { + const res = await fetch(getApiUrl(`/api/restyle/${job.jobId}`)); + if (!res.ok) throw new Error(`status ${res.status}`); + const data = await res.json(); + if (!alive) return; + wizard.setData({ job: { ...job, ...data } }); + } catch (e) { /* swallow transient */ } + }; + const i = setInterval(tick, 2000); + tick(); + return () => { alive = false; clearInterval(i); }; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [job?.jobId, job?.status]); + + const url = job?.result?.video_url ? getApiUrl(job.result.video_url) : null; + const status = job?.status || 'idle'; + + function sendToShortForm() { + if (!url) return; + // Stash the restyled URL in sessionStorage so ShortForm picks it up. + sessionStorage.setItem('openshorts.shortForm.handoff', JSON.stringify({ url, name: `restyled-${file?.name || 'video.mp4'}` })); + navigate('/short-form'); + } + + if (status === 'processing') { + return ( + <div className="h-full flex items-center justify-center p-12"> + <div className="max-w-md w-full"> + <div className="text-[14px] text-white font-medium mb-3">Restyling…</div> + <div className="h-1.5 bg-white/10 rounded-full overflow-hidden"> + <div className="h-full bg-primary transition-all" style={{ width: `${job?.progressPct || 5}%` }} /> + </div> + <div className="mt-4 text-[11px] text-zinc-500 font-mono leading-relaxed max-h-40 overflow-y-auto"> + {(job?.logs || []).slice(-8).map((l, i) => <div key={i}>{l}</div>)} + </div> + </div> + </div> + ); + } + + if (status === 'failed') { + return ( + <div className="h-full flex items-center justify-center p-12 text-center"> + <div className="max-w-md"> + <div className="text-[14px] text-red-400 font-medium mb-2">Restyle failed</div> + <div className="text-[12px] text-zinc-500 font-mono whitespace-pre-line"> + {(job?.logs || []).slice(-6).join('\n')} + </div> + <div className="mt-4 flex gap-3 justify-center"> + <button onClick={wizard.back} className="px-3 py-1.5 text-[12px] border border-border rounded-md text-zinc-300 hover:bg-white/5">Try again</button> + <button onClick={wizard.reset} className="px-3 py-1.5 text-[12px] btn-primary">Start over</button> + </div> + </div> + </div> + ); + } + + return ( + <div className="h-full flex flex-col p-8"> + <div className="flex-1 flex flex-col items-center gap-4"> + <div className="flex items-center gap-2 text-[12px]"> + <button onClick={() => setShowOriginal(false)} className={`px-3 py-1.5 rounded-md ${!showOriginal ? 'bg-white/10 text-white' : 'text-zinc-400 hover:text-white'}`}>After</button> + <button onClick={() => setShowOriginal(true)} disabled={!sourceUrl} className={`px-3 py-1.5 rounded-md disabled:opacity-30 ${showOriginal ? 'bg-white/10 text-white' : 'text-zinc-400 hover:text-white'}`}> + <Eye size={12} className="inline mr-1" /> Before + </button> + </div> + + <PhoneFrame size="md"> + {showOriginal && sourceUrl ? ( + <video key="src" src={sourceUrl} controls className="w-full h-full object-contain" /> + ) : url ? ( + <video key="rst" src={url} controls className="w-full h-full object-contain" /> + ) : ( + <div className="text-zinc-600 text-[12px] p-4 text-center">No preview available.</div> + )} + </PhoneFrame> + </div> + + <div className="border-t border-border pt-4 flex items-center gap-3"> + <a href={url || '#'} download className={`btn-primary px-3 py-2 text-[12px] flex items-center gap-2 ${!url ? 'opacity-40 pointer-events-none' : ''}`}> + <Download size={12} /> Download + </a> + <button onClick={sendToShortForm} disabled={!url} className="px-3 py-2 text-[12px] border border-primary/40 text-primary rounded-md hover:bg-primary/10 disabled:opacity-40"> + Send to Short-form → + </button> + <button onClick={wizard.reset} className="ml-auto px-3 py-2 text-[12px] text-zinc-400 hover:text-white"> + Start another + </button> + </div> + </div> + ); +} +``` + +- [ ] **Step 2: Verify build** + +```bash +cd frontend && npm run build +``` + +Expected: 0 errors. + +- [ ] **Step 3: Commit** + +```bash +git add frontend/src/pages/AIRestyle/steps/Review.jsx +git commit -m "feat(ai-restyle): wizard Review step (poll + Before/After + Send-to-Short-form)" +``` + +--- + +### Task 4b.5: Sidebar entry + App route + +**Files:** +- Modify: `frontend/src/layouts/Sidebar.jsx` +- Modify: `frontend/src/App.jsx` + +- [ ] **Step 1: Add sidebar entry** + +Replace the existing `NAV` array in `frontend/src/layouts/Sidebar.jsx`: + +```jsx +import { LayoutDashboard, Smartphone, Video, Scissors, Settings as SettingsIcon, Wand2 } from 'lucide-react'; + +const NAV = [ + { to: '/dashboard', label: 'Dashboard', icon: LayoutDashboard }, + { to: '/long-form', label: 'Long-form', icon: Video }, + { to: '/ai-restyle', label: 'AI Restyle', icon: Wand2 }, + { to: '/short-form', label: 'Short-form', icon: Smartphone }, + { to: '/clip-generator', label: 'Clip Generator', icon: Scissors }, + { to: '/settings', label: 'Settings', icon: SettingsIcon }, +]; +``` + +- [ ] **Step 2: Wire the App route** + +Locate the existing `<Routes>` block in `frontend/src/App.jsx`. Add: + +```jsx +import AIRestyle from './pages/AIRestyle/index.jsx'; + +// inside <Routes>, after the LongForm route: +<Route path="/ai-restyle/*" element={<AIRestyle />} /> +``` + +- [ ] **Step 3: Verify build** + +```bash +cd frontend && npm run build +``` + +Expected: 0 errors. + +- [ ] **Step 4: Commit** + +```bash +git add frontend/src/layouts/Sidebar.jsx frontend/src/App.jsx +git commit -m "feat(ai-restyle): sidebar entry + /ai-restyle route" +``` + +--- + +## Phase 5 — Settings tab (~1 day) + +### Task 5.1: AIRestylePresetsSection.jsx + tab registration + +**Files:** +- Create: `frontend/src/pages/Settings/sections/AIRestylePresetsSection.jsx` +- Modify: `frontend/src/pages/Settings/index.jsx` — register the new section. +- Reference pattern: `frontend/src/pages/Settings/sections/BrandKitSection.jsx` + +- [ ] **Step 1: Read the existing Settings shape** + +```bash +cd "/Users/matissevansteenbergen/Downloads/AGENTIC WORKLFOWS/PERSONAL/Auto-shorts (TODO)/openshorts" && cat frontend/src/pages/Settings/index.jsx | head -40 +cat frontend/src/pages/Settings/sections/BrandKitSection.jsx | head -50 +``` + +Match the export shape and props convention (probably `({ ... })` with no specific contract). + +- [ ] **Step 2: Implement AIRestylePresetsSection** + +```jsx +// frontend/src/pages/Settings/sections/AIRestylePresetsSection.jsx +// Edit / delete / star presets for the AI Restyle wizard. +import { useState } from 'react'; +import { Star, Pencil, Trash2, Plus } from 'lucide-react'; +import SectionHeader from './SectionHeader.jsx'; +import { + useAIRestylePresets, + upsertPreset, + deletePreset, + setDefault, +} from '../../../state/aiRestylePresets.js'; + +export default function AIRestylePresetsSection() { + const presets = useAIRestylePresets(); + const [editing, setEditing] = useState(null); // { dimension, preset } | null + + return ( + <section className="space-y-6"> + <SectionHeader + title="AI Restyle presets" + description="Edit the prompts used to relight the first frame. Star marks the recommended default." + /> + + <Dimension + title="Backgrounds" + items={presets.backgrounds} + defaultId={presets.defaultBackgroundId} + onEdit={(p) => setEditing({ dimension: 'background', preset: p })} + onAdd={() => setEditing({ dimension: 'background', preset: { id: '', label: '', prompt: '' } })} + onStar={(id) => setDefault('background', id)} + onDelete={(id) => deletePreset('background', id)} + /> + + <Dimension + title="Lightings" + items={presets.lightings} + defaultId={presets.defaultLightingId} + onEdit={(p) => setEditing({ dimension: 'lighting', preset: p })} + onAdd={() => setEditing({ dimension: 'lighting', preset: { id: '', label: '', prompt: '' } })} + onStar={(id) => setDefault('lighting', id)} + onDelete={(id) => deletePreset('lighting', id)} + /> + + {editing && ( + <EditModal + dimension={editing.dimension} + preset={editing.preset} + onClose={() => setEditing(null)} + onSave={(p) => { upsertPreset(editing.dimension, p); setEditing(null); }} + /> + )} + </section> + ); +} + +function Dimension({ title, items, defaultId, onEdit, onAdd, onStar, onDelete }) { + return ( + <div> + <h3 className="text-[13px] font-medium text-white mb-2">{title}</h3> + <div className="space-y-1 rounded-lg border border-border overflow-hidden"> + {items.map((p) => { + const isDefault = p.id === defaultId; + return ( + <div key={p.id} className="p-3 hover:bg-white/5 flex items-start gap-3"> + <button + onClick={() => onStar(p.id)} + title={isDefault ? 'Default' : 'Set as default'} + className={`mt-0.5 ${isDefault ? 'text-yellow-400' : 'text-zinc-600 hover:text-zinc-300'}`} + > + <Star size={14} fill={isDefault ? 'currentColor' : 'none'} /> + </button> + <div className="flex-1 min-w-0"> + <div className="text-[13px] text-white font-medium">{p.label}</div> + <div className="text-[11px] text-zinc-500 mt-0.5 leading-snug">{p.prompt}</div> + </div> + <button onClick={() => onEdit(p)} className="p-1.5 text-zinc-500 hover:text-white" title="Edit"><Pencil size={12} /></button> + <button + onClick={() => onDelete(p.id)} + disabled={isDefault} + className="p-1.5 text-zinc-500 hover:text-red-400 disabled:opacity-30 disabled:cursor-not-allowed" + title={isDefault ? 'Cannot delete the default preset' : 'Delete'} + > + <Trash2 size={12} /> + </button> + </div> + ); + })} + <button onClick={onAdd} className="w-full p-3 text-[12px] text-zinc-400 hover:text-white hover:bg-white/5 border-t border-border flex items-center justify-center gap-2"> + <Plus size={12} /> Add {title.toLowerCase().slice(0, -1)} preset + </button> + </div> + </div> + ); +} + +function EditModal({ dimension, preset, onClose, onSave }) { + const [label, setLabel] = useState(preset.label || ''); + const [prompt, setPrompt] = useState(preset.prompt || ''); + + function save() { + if (!label.trim() || !prompt.trim()) return; + const id = preset.id || label.toLowerCase().replace(/\s+/g, '-').slice(0, 40); + onSave({ id, label: label.slice(0, 40), prompt: prompt.slice(0, 500) }); + } + + return ( + <div className="fixed inset-0 bg-black/60 flex items-center justify-center z-50" onClick={onClose}> + <div className="bg-surface border border-border rounded-lg p-5 w-full max-w-md" onClick={(e) => e.stopPropagation()}> + <h3 className="text-[14px] font-medium text-white mb-3"> + {preset.id ? 'Edit' : 'Add'} {dimension} preset + </h3> + <label className="block text-[11px] text-zinc-500 uppercase mb-1">Name</label> + <input value={label} onChange={(e) => setLabel(e.target.value)} maxLength={40} + className="w-full bg-background border border-border rounded px-3 py-1.5 text-[13px] text-white mb-3" /> + <label className="block text-[11px] text-zinc-500 uppercase mb-1">Prompt</label> + <textarea value={prompt} onChange={(e) => setPrompt(e.target.value)} maxLength={500} rows={4} + className="w-full bg-background border border-border rounded px-3 py-2 text-[12px] text-zinc-200 font-mono" /> + <div className="text-[10px] text-zinc-500 text-right mt-1">{prompt.length}/500</div> + <div className="mt-4 flex justify-end gap-2"> + <button onClick={onClose} className="px-3 py-1.5 text-[12px] text-zinc-400 hover:text-white">Cancel</button> + <button onClick={save} disabled={!label.trim() || !prompt.trim()} className="btn-primary px-3 py-1.5 text-[12px] disabled:opacity-40">Save</button> + </div> + </div> + </div> + ); +} +``` + +- [ ] **Step 3: Register the section in Settings/index.jsx** + +Open `frontend/src/pages/Settings/index.jsx`. Add an import and one entry to whatever array/object defines the tab list (the existing pattern will be visible — match it exactly): + +```jsx +import AIRestylePresetsSection from './sections/AIRestylePresetsSection.jsx'; + +// inside the tab list (alongside BrandKit, ApiKeys, etc.): +{ id: 'ai-restyle', label: 'AI Restyle', component: AIRestylePresetsSection }, +``` + +- [ ] **Step 4: Verify build** + +```bash +cd frontend && npm run build +``` + +Expected: 0 errors. + +- [ ] **Step 5: Commit** + +```bash +git add frontend/src/pages/Settings/ +git commit -m "feat(ai-restyle): Settings tab with preset CRUD (star/edit/delete)" +``` + +--- + +## Phase 6 — Smoke test + Codex + ship (~0.5 day) + +### Task 6.1: Browser smoke test (chrome-devtools MCP) + +**No files.** Walk the user through these steps in a real browser. Per HANDOFF.md §6 rule 6 + Convention #5, UI features need browser verification, not just `npm run build`. + +- [ ] **Step 1: Restart backend container** + +```bash +docker restart openshorts-backend +``` + +- [ ] **Step 2: Open the app** + +Navigate the browser to `http://localhost:3001/#/ai-restyle`. Confirm: +- Sidebar shows "AI Restyle" between Long-form and Short-form with the Wand2 icon. +- Upload step renders with the drop zone. + +- [ ] **Step 3: Upload + duration check** + +Upload `demo-openshorts.mp4` (42s) → expect the rejection "AI Restyle v1 caps at 30s". Trim a copy to 10s and upload → expect Continue button enables. + +- [ ] **Step 4: Configure step** + +Verify two dropdowns render the 5 default backgrounds + 5 default lightings, each marked with ★ on the default. Pick "Bahamas beach" + "Golden hour". Effective prompt textarea updates. Type a custom override to confirm it doesn't blow away the saved preset. + +- [ ] **Step 5: Start the job** + +Click `Start restyle →`. Review step opens with the progress bar. Watch the logs cycle through: +- 🔎 Probing video duration +- 🎞️ Extracting first frame +- 🪄 Relighting frame with Nano Banana +- 💰 Nano Banana relight: ~$0.039 +- 🎬 Restyling video via fal.ai +- 💰 fal.ai v2v: ~$0.40 +- 🔊 Muxing original audio +- ✅ AI Restyle complete + +Total wall-clock should be ≤5 minutes. If it stalls past 10 minutes, kill the job, inspect `docker logs openshorts-backend`, and debug. Most common cause: wrong fal.ai model ID (revisit Phase 0 spike). + +- [ ] **Step 6: Verify output** + +After completion: +- Phone preview plays the restyled clip with new background/lighting. +- Before/After toggle works. +- Download button downloads the file. +- `Send to Short-form →` navigates to `/short-form` (initial sessionStorage handoff payload behavior — full integration with ShortForm's Upload step is documented as follow-up if not already present). + +- [ ] **Step 7: Settings tab smoke** + +Navigate to Settings → AI Restyle. Verify the two preset lists render. Click ★ on a non-default — it should move. Click `Edit` on a preset, change the label, save, verify the wizard's dropdown updates without a reload (event broadcast works). Click `+ Add background preset`, create one with custom name + prompt, save, confirm it appears in the wizard dropdown. + +- [ ] **Step 8: Take screenshots for the commit** + +Save 3 screenshots to `.compact-ultra/`: +- `ai-restyle-upload.png` +- `ai-restyle-configure.png` +- `ai-restyle-review-completed.png` + +These are session-local — gitignored. + +--- + +### Task 6.2: Codex adversarial review + +Per global CLAUDE.md "Codex Adversarial Review" rule: this phase introduces new HTTP endpoints calling LLMs + external services. Codex must review. + +- [ ] **Step 1: Trigger Codex** + +```bash +/codex:rescue --background "deep security audit of AI Restyle (new sidebar product): /api/restyle + /api/restyle/{job_id} routes, frame_extract / frame_relight / video_restyle ML modules, restyle_pipeline orchestrator. Focus on: input validation (prompt length, duration cap), command injection via filenames, FFmpeg argument injection, fal.ai key leakage, prompt-injection of background_prompt / lighting_prompt into the Gemini call, race conditions on shared jobs[] dict, output file collisions, missing auth/rate-limit/timeout (acknowledged as opt-out per HANDOFF.md §5 but flag any NEW gaps not already covered by the existing opt-outs)." +``` + +- [ ] **Step 2: Address Codex findings** + +Triage each finding. For each: +- BLOCKER (auth bypass, RCE, secret leak): fix before merge. +- HIGH (input validation gap, injection vector): fix in this PR. +- MEDIUM (defense-in-depth, missing log): add a follow-up issue. +- LOW (style nit): ignore. + +Apply fixes via Edit and re-run pytest + npm run build. + +- [ ] **Step 3: Commit Codex fixes** + +```bash +git add -A +git commit -m "fix(ai-restyle): address Codex review findings (<short summary>)" +``` + +--- + +### Task 6.3: Final gates + roadmap update + +**Files:** +- Modify: `ROADMAP.md` — promote AI Restyle to "Stubbed in v1" +- Modify: `~/.claude/CLAUDE.md` (auto-managed sections — run the script) + +- [ ] **Step 1: Run backend + frontend gates** + +```bash +cd backend && pytest -m "not e2e" -q +cd ../frontend && npm run build +``` + +Expected: pytest fully green (~170 tests), build 0 errors. + +- [ ] **Step 2: Update ROADMAP.md** + +Add (or replace if already a placeholder) under a new "### AI Restyle" section: + +```markdown +### AI Restyle + +**Shipped** +- Sidebar entry between Long-form and Short-form (icon: Wand2). +- 3-step wizard: Upload → Configure → Review. +- Two preset dimensions (Background + Lighting), 5 hand-tuned seed presets each. +- Per-job prompt override via editable textarea. +- Settings tab with full preset CRUD (star/edit/delete). +- 30s duration cap (client + server enforced). +- Original audio preserved bit-for-bit. + +**Stubbed in v1** +- History tab is a placeholder ("Past AI Restyle jobs will appear here"). +- Send-to-Short-form CTA stashes a session payload; full wire-through into ShortForm's Upload step is follow-up work. + +**Later** +- Lift the 30s cap via chunked v2v with shared reference frame (Approach B from design). +- Bridge from Short-form Review's stage selector ("+ AI Restyle" stage). +- Auto-suggest preset based on the source frame (Gemini-driven). +- Backend-stored preset sharing / team marketplace. +``` + +- [ ] **Step 3: Regenerate CLAUDE.md auto-managed sections** + +```bash +python3 scripts/update_claude_md.py +``` + +Expected: the module-map table gains entries for `ml/frame_extract.py`, `ml/frame_relight.py`, `ml/video_restyle.py`, `saas/restyle_pipeline.py`, `routes/ai_restyle.py`. ENV table is unchanged (no new env vars). + +- [ ] **Step 4: Final commit** + +```bash +git add ROADMAP.md ~/.claude/CLAUDE.md +git commit -m "docs(ai-restyle): roadmap entry + CLAUDE.md module-map refresh" +``` + +- [ ] **Step 5: Verify on the branch** + +```bash +git log --oneline | head -20 +git status +``` + +Expected: clean working tree, AI Restyle commits sit on top of the polish-plan commits, ready for PR. + +--- + +## Self-Review (post-write) + +**Spec coverage:** +- ✓ §1 goal — Phase 2 (pipeline) delivers. +- ✓ §2 user flow Upload — Task 4b.2. +- ✓ §2 user flow Configure — Task 4b.3. +- ✓ §2 user flow Review — Task 4b.4. +- ✓ §3 routes — Task 3.1. +- ✓ §3 pipeline orchestrator — Task 2.2. +- ✓ §3 ML modules (3) — Tasks 1.1, 1.2, 2.1. +- ✓ §4 frontend pages — Task 4b.1 onwards. +- ✓ §4 sidebar — Task 4b.5. +- ✓ §4 preset store — Task 4a.1. +- ✓ §4 Settings tab — Task 5.1. +- ✓ §4 seed presets — embedded in Task 4a.1. +- ✓ §5 security baseline C3 + C9 — covered in route Pydantic + cost log lines (Task 2.2 + 3.1). +- ✓ §5 cost telemetry — Task 2.2 log lines. +- ✓ §5 failure handling — Task 2.2 except block + Task 4b.4 failed-state UI. +- ✓ §5 tests — Tasks 1.1, 1.2, 2.1, 2.2, 3.1. +- ✓ §6 files — every added/modified file maps to a task. +- ✓ §7 milestones — Phases 0-6 mirror §7 exactly. +- ✓ §8 roadmap entry — Task 6.3 Step 2. +- ✓ §9 decisions — all referenced in code/tests where they apply. + +**Placeholder scan:** Phase 0 outputs (model ID, payload shape) are explicit dependencies of Phase 2 Task 2.1, with a clear ⚠️ callout. No "TBD" left in implementation steps. + +**Type consistency:** `relight_frame` signature matches between tests + impl + pipeline (api_key, frame_path, background_prompt, lighting_prompt, out_path). `restyle_video` signature matches (api_key, video_path, reference_frame_path, out_path). `run_restyle_job` signature matches (jobs, job_id, input_path, background_prompt, lighting_prompt, gemini_key, fal_key). Preset store API (`upsertPreset`, `deletePreset`, `setDefault`, `useAIRestylePresets`) used consistently across wizard + Settings. + +**Ambiguity:** No requirement in the spec is left to interpretation — Phase 0 picks the model, Phase 2 Task 2.1 has the contract for replacing the placeholder if Phase 0 picks differently. + +No issues found. Plan is ready for execution. + +--- + +*End of plan.* From c7e3db94a9b2bebcae1e306734a42b36c9001a3a Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 14:59:35 -0400 Subject: [PATCH 37/43] =?UTF-8?q?feat(short-form):=20Phase=204=20=E2=80=94?= =?UTF-8?q?=20merge=20endpoint=20+=20checkbox/modal=20UI=20in=20Review?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backend - New backend/app/video/merge.py: concat_clips() normalizes each input to 1080x1920@30fps + AAC 48 kHz stereo, then stitches with FFmpeg concat=v:a. All FFmpeg ops funnel through the wrapper per Convention #1. - POST /api/merge: bounds-checks clip_indices against the job's clip count, dedups while preserving user-picked order, allowlists transition ("cut"), rejects single-clip requests at the schema layer. Output filename encodes the ordered indices so the same merge is naturally idempotent. Frontend (Review.jsx) - Per-clip checkboxes in the sidebar; selection locks to a single job_id. - "Merge N selected" CTA appears in the export bar at ≥2 selections. - Confirmation modal with up/down reorder + remove; "Re-render" calls /api/merge and pushes the result into wizard.data.mergedClips for persistence across reloads. - New "Merged outputs" sidebar section previews merged files; stage selector, LUT picker, and before/after toggle are hidden while previewing a merge. Tests - backend/tests/unit/test_merge.py (8 cases) covers the normalize filter, concat arg composition, empty/single-input rejection, missing-file detection, and the "ffmpeg produced empty output" guard. - backend/tests/api/test_merge_endpoint.py (6 cases) covers HTTP-layer validation, dedup, and the happy-path response shape. - baseline.openapi.json regenerated to lock in the new route. - pytest 164/164 (up from 150), npm run build 0 warnings. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- backend/app/main.py | 90 +++++ backend/app/video/merge.py | 82 ++++ backend/tests/api/test_merge_endpoint.py | 129 +++++++ backend/tests/snapshots/baseline.openapi.json | 70 ++++ backend/tests/unit/test_merge.py | 126 ++++++ frontend/src/pages/ShortForm/steps/Review.jsx | 362 ++++++++++++++++-- 6 files changed, 827 insertions(+), 32 deletions(-) create mode 100644 backend/app/video/merge.py create mode 100644 backend/tests/api/test_merge_endpoint.py create mode 100644 backend/tests/unit/test_merge.py diff --git a/backend/app/main.py b/backend/app/main.py index 0e16e651..502d1130 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1336,6 +1336,96 @@ async def silence_cut_clip(req: SilenceCutRequest): } +# --------------------------------------------------------------------------- +# Phase 4: merge multiple clips into a single MP4 via FFmpeg concat. +# --------------------------------------------------------------------------- + +ALLOWED_MERGE_TRANSITIONS = ("cut",) + + +class MergeRequest(BaseModel): + job_id: str + clip_indices: List[int] = Field(..., min_length=2, max_length=50) + use_processed: bool = True + transition: str = "cut" + + @field_validator("transition", mode="before") + @classmethod + def _check_transition(cls, v: Any) -> str: + s = str(v or "cut").strip().lower() + if s not in ALLOWED_MERGE_TRANSITIONS: + raise ValueError( + f"transition must be one of {sorted(ALLOWED_MERGE_TRANSITIONS)}" + ) + return s + + +from app.video.merge import concat_clips # noqa: E402 — kept near use site + + +@app.post("/api/merge") +async def merge_clips(req: MergeRequest): + """Concat the selected clips of a job into a single ``merged_*.mp4``.""" + if req.job_id not in jobs: + raise HTTPException(status_code=404, detail="Job not found") + job = jobs[req.job_id] + if 'result' not in job or 'clips' not in job['result']: + raise HTTPException(status_code=400, detail="Job result not available") + clips = job['result']['clips'] + clip_count = len(clips) + + # Dedup preserving first-occurrence order. + seen = set() + ordered: List[int] = [] + for idx in req.clip_indices: + if idx < 0 or idx >= clip_count: + raise HTTPException( + status_code=400, + detail=f"clip_index {idx} out of range (0..{clip_count - 1})", + ) + if idx not in seen: + seen.add(idx) + ordered.append(idx) + if len(ordered) < 2: + raise HTTPException( + status_code=400, + detail="At least 2 distinct clip_indices are required to merge", + ) + + output_dir = os.path.join(OUTPUT_DIR, req.job_id) + input_paths: List[str] = [] + for idx in ordered: + url = clips[idx].get('video_url', '') + filename = url.split('/')[-1] if req.use_processed else f"_clip_{idx}.mp4" + if not filename: + raise HTTPException(status_code=400, detail=f"Clip {idx} has no source file") + path = os.path.join(output_dir, filename) + if not os.path.exists(path): + raise HTTPException(status_code=404, detail=f"Clip file not found: {filename}") + input_paths.append(path) + + out_filename = f"merged_{'_'.join(str(i) for i in ordered)}.mp4" + output_path = os.path.join(output_dir, out_filename) + + loop = asyncio.get_event_loop() + try: + await loop.run_in_executor( + None, + partial(concat_clips, input_paths, output_path), + ) + except FileNotFoundError as exc: + raise HTTPException(status_code=404, detail=str(exc)) + except Exception as exc: + print(f"❌ Merge Error: {exc}") + raise HTTPException(status_code=500, detail=str(exc)) + + return { + "success": True, + "clip_indices": ordered, + "new_video_url": f"/videos/{req.job_id}/{out_filename}", + } + + class HookRequest(BaseModel): job_id: str clip_index: int diff --git a/backend/app/video/merge.py b/backend/app/video/merge.py new file mode 100644 index 00000000..d3cc2a1e --- /dev/null +++ b/backend/app/video/merge.py @@ -0,0 +1,82 @@ +"""Concat multiple short-form clips into a single MP4 via FFmpeg filter_complex. + +Each input is normalized to 1080x1920@30fps + AAC 48 kHz stereo so the concat +filter can stitch sources whose resolution/fps/sample-rate diverge (e.g. mixing +a graded clip and a subtitled clip). All FFmpeg invocations funnel through +``app.video.ffmpeg`` per project convention. +""" + +from __future__ import annotations + +import os +from typing import List, Sequence + +from app.video import ffmpeg as ffmpeg_wrapper + + +# Letterbox-pad to 9:16 if source has a different aspect, force 30 fps + yuv420p +# so concat is happy regardless of input resolution/fps. +NORMALIZE_FILTER = ( + "scale=1080:1920:force_original_aspect_ratio=decrease," + "pad=1080:1920:(ow-iw)/2:(oh-ih)/2," + "fps=30,setsar=1,format=yuv420p" +) + + +def build_concat_args(input_paths: Sequence[str], output_path: str) -> List[str]: + """Build the ffmpeg argv (without the leading ``ffmpeg``) for a concat pass. + + Caller is responsible for path validation; this builder only composes args. + """ + args: List[str] = ["-y"] + for path in input_paths: + args.extend(["-i", path]) + + n = len(input_paths) + filter_parts: List[str] = [] + concat_inputs: List[str] = [] + for i in range(n): + filter_parts.append( + f"[{i}:v]{NORMALIZE_FILTER}[v{i}]" + ) + filter_parts.append( + f"[{i}:a]aresample=48000,aformat=channel_layouts=stereo[a{i}]" + ) + concat_inputs.append(f"[v{i}][a{i}]") + filter_parts.append( + f"{''.join(concat_inputs)}concat=n={n}:v=1:a=1[outv][outa]" + ) + + args.extend([ + "-filter_complex", ";".join(filter_parts), + "-map", "[outv]", + "-map", "[outa]", + "-c:v", "libx264", "-preset", "fast", "-crf", "20", + "-c:a", "aac", "-b:a", "128k", "-ar", "48000", "-ac", "2", + output_path, + ]) + return args + + +def concat_clips(input_paths: Sequence[str], output_path: str) -> str: + """Concat ``input_paths`` into a single MP4 at ``output_path``. + + Raises ``ValueError`` if fewer than 2 inputs are provided, + ``FileNotFoundError`` if any input is missing, ``FFmpegError`` if the + ffmpeg invocation fails, and ``RuntimeError`` if the resulting file is + missing or empty after a "successful" run. + """ + if len(input_paths) < 2: + raise ValueError( + f"concat_clips needs at least 2 inputs (got {len(input_paths)})" + ) + for path in input_paths: + if not os.path.exists(path): + raise FileNotFoundError(f"Merge input not found: {path}") + + args = build_concat_args(list(input_paths), output_path) + ffmpeg_wrapper.run(args) + + if not os.path.exists(output_path) or os.path.getsize(output_path) == 0: + raise RuntimeError(f"Merge produced empty output: {output_path}") + return output_path diff --git a/backend/tests/api/test_merge_endpoint.py b/backend/tests/api/test_merge_endpoint.py new file mode 100644 index 00000000..8dc17531 --- /dev/null +++ b/backend/tests/api/test_merge_endpoint.py @@ -0,0 +1,129 @@ +"""Contract tests for POST /api/merge. + +Validates the request schema (bounds checks, dedup, transition allowlist) and +the integration with the in-memory job store. The actual ffmpeg invocation is +mocked at ``app.video.merge.concat_clips`` so the test runs without ffmpeg. +""" +from __future__ import annotations + +import os +from pathlib import Path +from unittest.mock import patch + +import pytest + + +@pytest.fixture +def app_client(tmp_path, monkeypatch): + (tmp_path / "uploads").mkdir(exist_ok=True) + (tmp_path / "output").mkdir(exist_ok=True) + monkeypatch.chdir(tmp_path) + + from fastapi.testclient import TestClient + from app.main import app as fastapi_app, jobs + + # Seed a fake completed job with 3 clips. + job_id = "test-merge-job" + job_dir = tmp_path / "output" / job_id + job_dir.mkdir(parents=True, exist_ok=True) + clip_files = [] + for i in range(3): + p = job_dir / f"_clip_{i}.mp4" + p.write_bytes(b"fake clip data") + clip_files.append(p) + jobs[job_id] = { + "id": job_id, + "status": "completed", + "result": { + "clips": [ + {"video_url": f"/videos/{job_id}/_clip_{i}.mp4"} for i in range(3) + ], + }, + } + + with TestClient(fastapi_app) as client: + yield client, job_id, job_dir + + jobs.pop(job_id, None) + + +def test_merge_rejects_unknown_job(app_client): + client, _job_id, _ = app_client + r = client.post("/api/merge", json={ + "job_id": "ghost-job", + "clip_indices": [0, 1], + }) + assert r.status_code == 404 + + +def test_merge_rejects_out_of_bounds_clip_index(app_client): + client, job_id, _ = app_client + r = client.post("/api/merge", json={ + "job_id": job_id, + "clip_indices": [0, 99], + }) + assert r.status_code in (400, 422) + + +def test_merge_rejects_single_clip(app_client): + client, job_id, _ = app_client + r = client.post("/api/merge", json={ + "job_id": job_id, + "clip_indices": [0], + }) + assert r.status_code in (400, 422) + + +def test_merge_rejects_unknown_transition(app_client): + client, job_id, _ = app_client + r = client.post("/api/merge", json={ + "job_id": job_id, + "clip_indices": [0, 1], + "transition": "starfade", + }) + assert r.status_code in (400, 422) + + +def test_merge_dedupes_repeated_clip_indices(app_client): + client, job_id, job_dir = app_client + + captured = {} + + def fake_concat(inputs, output): + captured["inputs"] = list(inputs) + captured["output"] = output + Path(output).write_bytes(b"merged") + return output + + with patch("app.main.concat_clips", side_effect=fake_concat): + r = client.post("/api/merge", json={ + "job_id": job_id, + "clip_indices": [0, 0, 1, 1, 2], + }) + assert r.status_code == 200 + # Dedup preserves first occurrence order: [0, 1, 2]. + assert len(captured["inputs"]) == 3 + assert os.path.basename(captured["inputs"][0]) == "_clip_0.mp4" + assert os.path.basename(captured["inputs"][1]) == "_clip_1.mp4" + assert os.path.basename(captured["inputs"][2]) == "_clip_2.mp4" + + +def test_merge_happy_path_returns_new_video_url(app_client): + client, job_id, job_dir = app_client + + def fake_concat(inputs, output): + Path(output).write_bytes(b"merged") + return output + + with patch("app.main.concat_clips", side_effect=fake_concat): + r = client.post("/api/merge", json={ + "job_id": job_id, + "clip_indices": [2, 0], + }) + assert r.status_code == 200 + body = r.json() + assert body["success"] is True + assert body["new_video_url"].startswith(f"/videos/{job_id}/merged_") + assert body["new_video_url"].endswith(".mp4") + # Filename encodes the user-picked order, not sorted: "merged_2_0.mp4". + assert "merged_2_0.mp4" in body["new_video_url"] diff --git a/backend/tests/snapshots/baseline.openapi.json b/backend/tests/snapshots/baseline.openapi.json index 38bdb744..acd9c37f 100644 --- a/backend/tests/snapshots/baseline.openapi.json +++ b/backend/tests/snapshots/baseline.openapi.json @@ -479,6 +479,39 @@ "title": "HookRequest", "type": "object" }, + "MergeRequest": { + "properties": { + "clip_indices": { + "items": { + "type": "integer" + }, + "maxItems": 50, + "minItems": 2, + "title": "Clip Indices", + "type": "array" + }, + "job_id": { + "title": "Job Id", + "type": "string" + }, + "transition": { + "default": "cut", + "title": "Transition", + "type": "string" + }, + "use_processed": { + "default": true, + "title": "Use Processed", + "type": "boolean" + } + }, + "required": [ + "job_id", + "clip_indices" + ], + "title": "MergeRequest", + "type": "object" + }, "SaaSActorRequest": { "properties": { "actor_description": { @@ -1435,6 +1468,43 @@ "summary": "Add Hook" } }, + "/api/merge": { + "post": { + "description": "Concat the selected clips of a job into a single ``merged_*.mp4``.", + "operationId": "merge_clips_api_merge_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MergeRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "schema": {} + } + }, + "description": "Successful Response" + }, + "422": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + }, + "description": "Validation Error" + } + }, + "summary": "Merge Clips" + } + }, "/api/process": { "post": { "operationId": "process_endpoint_api_process_post", diff --git a/backend/tests/unit/test_merge.py b/backend/tests/unit/test_merge.py new file mode 100644 index 00000000..f4805fa0 --- /dev/null +++ b/backend/tests/unit/test_merge.py @@ -0,0 +1,126 @@ +"""Tests for the Phase 4 merge helper. + +Covers the public surface of ``app.video.merge.concat_clips``: input +validation, filter-graph composition, output path derivation, and the +FFmpeg invocation contract (only via the wrapper). +""" +from __future__ import annotations + +from unittest.mock import patch + +import pytest + +from app.video.merge import ( + NORMALIZE_FILTER, + build_concat_args, + concat_clips, +) + + +def test_normalize_filter_targets_1080x1920_30fps(): + # Output normalized to 9:16 1080x1920 @ 30fps so concat can succeed + # regardless of source resolution / fps. + assert "scale=1080:1920" in NORMALIZE_FILTER + assert "fps=30" in NORMALIZE_FILTER + assert "setsar=1" in NORMALIZE_FILTER + assert "format=yuv420p" in NORMALIZE_FILTER + + +def test_build_concat_args_two_inputs(tmp_path): + a = tmp_path / "a.mp4" + b = tmp_path / "b.mp4" + a.write_bytes(b"x") + b.write_bytes(b"x") + out = tmp_path / "merged.mp4" + + args = build_concat_args([str(a), str(b)], str(out)) + + # Both inputs declared with -i. + assert args.count("-i") == 2 + # Filter graph references the normalize chain once per input, then concat. + fc_index = args.index("-filter_complex") + fc = args[fc_index + 1] + assert "[0:v]" in fc and "[1:v]" in fc + assert "[0:a]" in fc and "[1:a]" in fc + assert "concat=n=2:v=1:a=1" in fc + # Audio re-encoded to AAC 48 kHz stereo for clean concat. + assert "-c:a" in args and args[args.index("-c:a") + 1] == "aac" + assert "-ar" in args and args[args.index("-ar") + 1] == "48000" + assert "-ac" in args and args[args.index("-ac") + 1] == "2" + # Video re-encoded with libx264. + assert "-c:v" in args and args[args.index("-c:v") + 1] == "libx264" + # Output path is the last positional argument. + assert args[-1] == str(out) + + +def test_build_concat_args_three_inputs_concat_n_matches(tmp_path): + paths = [] + for name in ("a.mp4", "b.mp4", "c.mp4"): + p = tmp_path / name + p.write_bytes(b"x") + paths.append(str(p)) + out = tmp_path / "merged.mp4" + + args = build_concat_args(paths, str(out)) + fc = args[args.index("-filter_complex") + 1] + assert "concat=n=3:v=1:a=1" in fc + for i in range(3): + assert f"[{i}:v]" in fc + assert f"[{i}:a]" in fc + + +def test_concat_clips_rejects_empty_list(tmp_path): + with pytest.raises(ValueError) as exc: + concat_clips([], str(tmp_path / "out.mp4")) + assert "at least" in str(exc.value).lower() + + +def test_concat_clips_rejects_single_input(tmp_path): + a = tmp_path / "a.mp4" + a.write_bytes(b"x") + with pytest.raises(ValueError) as exc: + concat_clips([str(a)], str(tmp_path / "out.mp4")) + assert "at least 2" in str(exc.value).lower() + + +def test_concat_clips_rejects_missing_input(tmp_path): + a = tmp_path / "a.mp4" + a.write_bytes(b"x") + missing = tmp_path / "ghost.mp4" + with pytest.raises(FileNotFoundError): + concat_clips([str(a), str(missing)], str(tmp_path / "out.mp4")) + + +def test_concat_clips_invokes_ffmpeg_with_expected_filter(tmp_path): + a = tmp_path / "a.mp4" + b = tmp_path / "b.mp4" + a.write_bytes(b"x") + b.write_bytes(b"x") + out = tmp_path / "merged.mp4" + + def fake_run(args, **_kwargs): + # Mimic a successful ffmpeg run by creating the output file. + out.write_bytes(b"merged") + return None + + with patch("app.video.merge.ffmpeg_wrapper.run", side_effect=fake_run) as run_mock: + result = concat_clips([str(a), str(b)], str(out)) + + assert result == str(out) + args = run_mock.call_args.args[0] + fc = args[args.index("-filter_complex") + 1] + assert "concat=n=2:v=1:a=1" in fc + + +def test_concat_clips_raises_when_ffmpeg_produces_empty_output(tmp_path): + a = tmp_path / "a.mp4" + b = tmp_path / "b.mp4" + a.write_bytes(b"x") + b.write_bytes(b"x") + out = tmp_path / "merged.mp4" + + with patch("app.video.merge.ffmpeg_wrapper.run") as run_mock: + run_mock.return_value = None + with pytest.raises(RuntimeError) as exc: + concat_clips([str(a), str(b)], str(out)) + assert "empty output" in str(exc.value) diff --git a/frontend/src/pages/ShortForm/steps/Review.jsx b/frontend/src/pages/ShortForm/steps/Review.jsx index 4071210e..6799eae2 100644 --- a/frontend/src/pages/ShortForm/steps/Review.jsx +++ b/frontend/src/pages/ShortForm/steps/Review.jsx @@ -15,7 +15,7 @@ // - Send to CapCut: placeholder — backend integration TODO. import { useEffect, useMemo, useState } from 'react'; -import { Download, Eye, Loader2, Plus, Scissors } from 'lucide-react'; +import { ArrowDown, ArrowUp, Combine, Download, Eye, Loader2, Plus, Scissors, X } from 'lucide-react'; import PhoneFrame from '../../../components/ui/PhoneFrame.jsx'; import PlatformBadge from '../../../components/ui/PlatformBadge.jsx'; import { getApiUrl } from '../../../config'; @@ -87,7 +87,9 @@ export default function Review({ wizard }) { const files = wizard.data.files || []; const jobs = wizard.data.jobs || {}; const clips = useMemo(() => flattenClips(jobs, files), [jobs, files]); + const mergedClips = wizard.data.mergedClips || []; const [selected, setSelected] = useState(0); + const [selectedMergedId, setSelectedMergedId] = useState(null); const [showOriginal, setShowOriginal] = useState(false); const [sourceUrl, setSourceUrl] = useState(null); @@ -95,6 +97,13 @@ export default function Review({ wizard }) { const [pendingStage, setPendingStage] = useState(null); // { clipKey, stageKey } const [stageError, setStageError] = useState(null); // { clipKey, message } + // Merge UI state. + const [mergeChecked, setMergeChecked] = useState({}); // clipKey -> bool + const [mergeModalOpen, setMergeModalOpen] = useState(false); + const [modalOrder, setModalOrder] = useState([]); // clipKey[] + const [merging, setMerging] = useState(false); + const [mergeError, setMergeError] = useState(null); + const keys = useKeys(); const brand = useBrandKit(); @@ -115,9 +124,14 @@ export default function Review({ wizard }) { // deepest existing variant, then to the polished URL the backend set, then // to the raw clip URL (covers legacy clips without a variants dict). const stageFilename = variants?.[selectedStage] || variants?.polished || null; - const clipUrl = stageFilename + const baseClipUrl = stageFilename ? getApiUrl(`/videos/${current.jobId}/${stageFilename}`) : (current?.clip?.video_url ? getApiUrl(current.clip.video_url) : null); + // Merged previews override everything: a merged output has no variants. + const activeMergedClip = selectedMergedId + ? (wizard.data.mergedClips || []).find((m) => m.id === selectedMergedId) + : null; + const clipUrl = activeMergedClip ? getApiUrl(activeMergedClip.url) : baseClipUrl; // Build a blob URL for the original source file — only available when // the wizard has the in-memory File (lost after reload). @@ -300,6 +314,115 @@ export default function Review({ wizard }) { } } + // --- Merge helpers -------------------------------------------------------- + // Restrict selection to a single job's clips so /api/merge (which takes one + // job_id) stays valid. Once a clip is checked, lock the candidate set. + const checkedClipKeys = Object.keys(mergeChecked).filter((k) => mergeChecked[k]); + const lockedJobId = checkedClipKeys.length > 0 + ? clips.find((c) => clipKey(c) === checkedClipKeys[0])?.jobId + : null; + + function isMergeable(c) { + return !lockedJobId || c.jobId === lockedJobId; + } + + function toggleMergeCheck(c) { + const k = clipKey(c); + setMergeChecked((prev) => { + const next = { ...prev }; + if (next[k]) delete next[k]; + else next[k] = true; + return next; + }); + } + + function clearMergeSelection() { + setMergeChecked({}); + setMergeError(null); + } + + function openMergeModal() { + const ordered = clips + .filter((c) => mergeChecked[clipKey(c)]) + .map((c) => clipKey(c)); + setModalOrder(ordered); + setMergeError(null); + setMergeModalOpen(true); + } + + function reorderModal(fromIdx, toIdx) { + setModalOrder((prev) => { + if (toIdx < 0 || toIdx >= prev.length) return prev; + const next = [...prev]; + const [moved] = next.splice(fromIdx, 1); + next.splice(toIdx, 0, moved); + return next; + }); + } + + function removeFromModal(k) { + setModalOrder((prev) => prev.filter((x) => x !== k)); + setMergeChecked((prev) => { + const next = { ...prev }; + delete next[k]; + return next; + }); + } + + async function submitMerge() { + if (modalOrder.length < 2) { + setMergeError('Need at least 2 clips to merge'); + return; + } + const ordered = modalOrder + .map((k) => clips.find((c) => clipKey(c) === k)) + .filter(Boolean); + if (ordered.length < 2) { + setMergeError('Selected clips no longer available'); + return; + } + const jobId = ordered[0].jobId; + const indices = ordered.map((c) => c.clipIndex); + setMerging(true); + setMergeError(null); + try { + const res = await fetch(getApiUrl('/api/merge'), { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + job_id: jobId, + clip_indices: indices, + use_processed: true, + transition: 'cut', + }), + }); + if (!res.ok) throw new Error(await res.text()); + const data = await res.json(); + const url = data.new_video_url; + if (!url) throw new Error('Empty response from backend'); + const merged = { + id: `m-${jobId}-${indices.join('_')}-${Date.now()}`, + jobId, + indices, + url, + label: `Merged ${indices.map((i) => `#${i + 1}`).join(' + ')}`, + createdAt: Date.now(), + }; + wizard.setData((prev) => ({ + ...prev, + mergedClips: [...(prev.mergedClips || []), merged], + })); + setSelectedMergedId(merged.id); + setMergeModalOpen(false); + setMergeChecked({}); + setModalOrder([]); + } catch (e) { + setMergeError(String(e.message || e)); + } finally { + setMerging(false); + } + } + function publish(platform, scheduled) { if (!current) return; pushNotification({ @@ -332,45 +455,94 @@ export default function Review({ wizard }) { {clips.length} clip{clips.length === 1 ? '' : 's'} </div> {clips.map((c, i) => { - const active = i === selected; + const active = i === selected && !selectedMergedId; const clipTitle = c.clip?.video_title_for_youtube_short || c.clip?.title; + const k = clipKey(c); + const checked = !!mergeChecked[k]; + const mergeable = isMergeable(c); return ( - <button - key={`${c.jobId}-${c.clipIndex}`} - onClick={() => { setSelected(i); setShowOriginal(false); }} - className={`w-full text-left rounded-lg p-2 transition-colors ${ + <div + key={k} + className={`w-full rounded-lg p-2 transition-colors flex items-start gap-2 ${ active ? 'bg-primary/15 border border-primary/30' : 'border border-transparent hover:bg-white/5' }`} > - <div className={`text-[12px] font-medium truncate ${active ? 'text-white' : 'text-zinc-300'}`}> - Clip {i + 1} - </div> - <div className="text-[10px] text-zinc-500 truncate mt-0.5">{c.sourceName}</div> - {clipTitle && ( - <div className="text-[10px] text-zinc-400 truncate mt-1 italic">"{clipTitle}"</div> - )} - </button> + <input + type="checkbox" + aria-label={`Select clip ${i + 1} to merge`} + checked={checked} + disabled={!mergeable && !checked} + onChange={() => toggleMergeCheck(c)} + title={mergeable ? 'Include in merge' : 'Merge only works within one source video'} + className="mt-0.5 accent-primary disabled:opacity-30 cursor-pointer disabled:cursor-not-allowed" + onClick={(e) => e.stopPropagation()} + /> + <button + type="button" + onClick={() => { setSelected(i); setSelectedMergedId(null); setShowOriginal(false); }} + className="flex-1 text-left min-w-0" + > + <div className={`text-[12px] font-medium truncate ${active ? 'text-white' : 'text-zinc-300'}`}> + Clip {i + 1} + </div> + <div className="text-[10px] text-zinc-500 truncate mt-0.5">{c.sourceName}</div> + {clipTitle && ( + <div className="text-[10px] text-zinc-400 truncate mt-1 italic">"{clipTitle}"</div> + )} + </button> + </div> ); })} + + {mergedClips.length > 0 && ( + <div className="pt-3 mt-2 border-t border-border space-y-1"> + <div className="text-[11px] uppercase tracking-wider text-zinc-500 px-2 mb-1"> + Merged outputs + </div> + {mergedClips.map((m) => { + const active = selectedMergedId === m.id; + return ( + <button + key={m.id} + type="button" + onClick={() => { setSelectedMergedId(m.id); setShowOriginal(false); }} + className={`w-full text-left rounded-lg p-2 transition-colors flex items-center gap-2 ${ + active ? 'bg-primary/15 border border-primary/30' : 'border border-transparent hover:bg-white/5' + }`} + > + <Combine size={12} className="text-zinc-400 shrink-0" /> + <div className="min-w-0 flex-1"> + <div className={`text-[12px] font-medium truncate ${active ? 'text-white' : 'text-zinc-300'}`}> + {m.label} + </div> + <div className="text-[10px] text-zinc-500 truncate">{m.indices.length} clips</div> + </div> + </button> + ); + })} + </div> + )} </aside> <div className="flex-1 flex flex-col overflow-hidden"> <div className="flex-1 overflow-y-auto custom-scrollbar p-6 flex flex-col items-center gap-4"> - <div className="flex items-center gap-2 text-[12px]"> - <button - onClick={() => setShowOriginal(false)} - className={`px-3 py-1.5 rounded-md ${!showOriginal ? 'bg-white/10 text-white' : 'text-zinc-400 hover:text-white'}`} - > - After - </button> - <button - onClick={() => setShowOriginal(true)} - disabled={!sourceUrl} - className={`px-3 py-1.5 rounded-md disabled:opacity-30 disabled:cursor-not-allowed ${showOriginal ? 'bg-white/10 text-white' : 'text-zinc-400 hover:text-white'}`} - > - <Eye size={12} className="inline mr-1" /> Before - </button> - </div> + {!activeMergedClip && ( + <div className="flex items-center gap-2 text-[12px]"> + <button + onClick={() => setShowOriginal(false)} + className={`px-3 py-1.5 rounded-md ${!showOriginal ? 'bg-white/10 text-white' : 'text-zinc-400 hover:text-white'}`} + > + After + </button> + <button + onClick={() => setShowOriginal(true)} + disabled={!sourceUrl} + className={`px-3 py-1.5 rounded-md disabled:opacity-30 disabled:cursor-not-allowed ${showOriginal ? 'bg-white/10 text-white' : 'text-zinc-400 hover:text-white'}`} + > + <Eye size={12} className="inline mr-1" /> Before + </button> + </div> + )} <PhoneFrame size="md"> {showOriginal && sourceUrl ? ( @@ -382,10 +554,16 @@ export default function Review({ wizard }) { )} </PhoneFrame> + {activeMergedClip && ( + <div className="text-[11px] text-zinc-400 max-w-md text-center" role="status"> + Preview of the merged output — no further editing available. Download from the bar below. + </div> + )} + {/* Stage selector — segmented row. Lights up the currently-displayed variant and exposes a [+] on each missing stage so the user can fill it in without leaving Review. */} - {!showOriginal && current && ( + {!showOriginal && current && !activeMergedClip && ( <div className="flex flex-col items-center gap-2"> <div className="inline-flex rounded-lg border border-border bg-surface p-0.5 text-[12px]"> {STAGES.map((stage) => { @@ -448,7 +626,7 @@ export default function Review({ wizard }) { </div> )} - {title && ( + {title && !activeMergedClip && ( <div className="text-center max-w-md"> <div className="text-[13px] text-white font-medium">{title}</div> {description && ( @@ -459,6 +637,28 @@ export default function Review({ wizard }) { </div> <div className="border-t border-border bg-surface px-4 py-3 flex flex-wrap items-center gap-3 shrink-0"> + {checkedClipKeys.length >= 2 && ( + <button + type="button" + onClick={openMergeModal} + className="btn-primary px-3 py-2 text-[12px] flex items-center gap-2 bg-emerald-600 hover:bg-emerald-500" + title="Stitch the selected clips into one MP4" + > + <Combine size={12} /> Merge {checkedClipKeys.length} selected + </button> + )} + {checkedClipKeys.length > 0 && checkedClipKeys.length < 2 && ( + <span className="text-[11px] text-zinc-500">Select at least 2 clips to merge</span> + )} + {checkedClipKeys.length > 0 && ( + <button + type="button" + onClick={clearMergeSelection} + className="text-[11px] text-zinc-400 hover:text-white underline-offset-2 hover:underline" + > + Clear + </button> + )} <a href={clipUrl || '#'} download @@ -491,6 +691,104 @@ export default function Review({ wizard }) { </button> </div> </div> + + {mergeModalOpen && ( + <div + className="fixed inset-0 z-50 bg-black/70 flex items-center justify-center p-6" + role="dialog" + aria-modal="true" + aria-label="Confirm merge order" + onClick={() => !merging && setMergeModalOpen(false)} + > + <div + className="w-full max-w-md bg-surface border border-border rounded-xl p-5 space-y-4" + onClick={(e) => e.stopPropagation()} + > + <div> + <h2 className="text-[14px] text-white font-semibold flex items-center gap-2"> + <Combine size={14} /> Merge clips + </h2> + <p className="text-[11px] text-zinc-500 mt-1"> + Reorder, remove, then re-render. The merged file appears under "Merged outputs" in the sidebar. + </p> + </div> + + <ul className="space-y-1.5"> + {modalOrder.map((k, idx) => { + const c = clips.find((x) => clipKey(x) === k); + if (!c) return null; + const title = c.clip?.video_title_for_youtube_short || c.clip?.title; + return ( + <li + key={k} + className="flex items-center gap-2 bg-background border border-border rounded-md p-2" + > + <div className="text-[11px] text-zinc-500 w-5 text-right">{idx + 1}.</div> + <div className="flex-1 min-w-0"> + <div className="text-[12px] text-white truncate">Clip {c.clipIndex + 1}</div> + {title && <div className="text-[10px] text-zinc-500 truncate italic">"{title}"</div>} + </div> + <button + type="button" + aria-label="Move up" + disabled={idx === 0 || merging} + onClick={() => reorderModal(idx, idx - 1)} + className="p-1 text-zinc-400 hover:text-white disabled:opacity-20 disabled:cursor-not-allowed" + > + <ArrowUp size={12} /> + </button> + <button + type="button" + aria-label="Move down" + disabled={idx === modalOrder.length - 1 || merging} + onClick={() => reorderModal(idx, idx + 1)} + className="p-1 text-zinc-400 hover:text-white disabled:opacity-20 disabled:cursor-not-allowed" + > + <ArrowDown size={12} /> + </button> + <button + type="button" + aria-label="Remove" + disabled={merging} + onClick={() => removeFromModal(k)} + className="p-1 text-zinc-400 hover:text-red-400 disabled:opacity-20" + > + <X size={12} /> + </button> + </li> + ); + })} + </ul> + + {mergeError && ( + <div className="text-[11px] text-red-400" role="alert">{mergeError}</div> + )} + + <div className="flex items-center justify-end gap-2 pt-1"> + <button + type="button" + onClick={() => setMergeModalOpen(false)} + disabled={merging} + className="px-3 py-1.5 text-[12px] text-zinc-400 hover:text-white" + > + Cancel + </button> + <button + type="button" + onClick={submitMerge} + disabled={merging || modalOrder.length < 2} + className="btn-primary px-3 py-1.5 text-[12px] flex items-center gap-2 disabled:opacity-50" + > + {merging ? ( + <><Loader2 size={12} className="animate-spin" /> Re-rendering…</> + ) : ( + <>Re-render</> + )} + </button> + </div> + </div> + </div> + )} </div> ); } From 60966323ce20cd62acc6617f866040af63f253c9 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 15:17:12 -0400 Subject: [PATCH 38/43] =?UTF-8?q?fix(short-form):=20Phase=205=20=E2=80=94?= =?UTF-8?q?=20atomic=20rename=20for=20/api/merge=20output?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex audit flagged a real race in the Phase 4 merge helper: two clients POSTing /api/merge with the same clip_indices both write to the same deterministic `merged_{indices}.mp4` filename with ffmpeg's `-y` flag. A reader hitting `/videos/{job_id}/merged_*.mp4` between writer-A start and writer-B finish could see a partial / mid-write file. Fix: concat_clips writes to `{output}.partial-{nonce}.mp4`, then os.replace()s onto the public path. Idempotency on the URL is preserved (filename stable), but the public path is never half-written: readers see the prior file or the new file, never a partial. On ffmpeg failure the partial is cleaned up; the stable output stays intact. Tests: 3 new unit cases (partial path used, cleanup on failure, unique partials across calls) + 3 new API cases (negative-index reject, case- normalized transition, concurrent identical merges converge on same public path). Suite: 170/170 (up from 164). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- backend/app/video/merge.py | 24 +++-- backend/tests/api/test_merge_endpoint.py | 64 ++++++++++++++ backend/tests/unit/test_merge.py | 106 ++++++++++++++++++++++- 3 files changed, 185 insertions(+), 9 deletions(-) diff --git a/backend/app/video/merge.py b/backend/app/video/merge.py index d3cc2a1e..808dbca8 100644 --- a/backend/app/video/merge.py +++ b/backend/app/video/merge.py @@ -8,7 +8,9 @@ from __future__ import annotations +import contextlib import os +import secrets from typing import List, Sequence from app.video import ffmpeg as ffmpeg_wrapper @@ -61,6 +63,12 @@ def build_concat_args(input_paths: Sequence[str], output_path: str) -> List[str] def concat_clips(input_paths: Sequence[str], output_path: str) -> str: """Concat ``input_paths`` into a single MP4 at ``output_path``. + Writes to a unique ``{output_path}.partial-{nonce}.mp4`` file first, then + ``os.replace()``s it onto ``output_path``. Two concurrent merges with the + same indices (the public URL is filename-as-idempotency-key) get distinct + partial paths, so neither clobbers the other mid-write; readers see the + pre-merge file or the post-merge file, never a partial. + Raises ``ValueError`` if fewer than 2 inputs are provided, ``FileNotFoundError`` if any input is missing, ``FFmpegError`` if the ffmpeg invocation fails, and ``RuntimeError`` if the resulting file is @@ -74,9 +82,15 @@ def concat_clips(input_paths: Sequence[str], output_path: str) -> str: if not os.path.exists(path): raise FileNotFoundError(f"Merge input not found: {path}") - args = build_concat_args(list(input_paths), output_path) - ffmpeg_wrapper.run(args) - - if not os.path.exists(output_path) or os.path.getsize(output_path) == 0: - raise RuntimeError(f"Merge produced empty output: {output_path}") + partial_path = f"{output_path}.partial-{secrets.token_hex(6)}.mp4" + args = build_concat_args(list(input_paths), partial_path) + try: + ffmpeg_wrapper.run(args) + if not os.path.exists(partial_path) or os.path.getsize(partial_path) == 0: + raise RuntimeError(f"Merge produced empty output: {output_path}") + os.replace(partial_path, output_path) + except Exception: + with contextlib.suppress(OSError): + os.remove(partial_path) + raise return output_path diff --git a/backend/tests/api/test_merge_endpoint.py b/backend/tests/api/test_merge_endpoint.py index 8dc17531..57a68d08 100644 --- a/backend/tests/api/test_merge_endpoint.py +++ b/backend/tests/api/test_merge_endpoint.py @@ -65,6 +65,70 @@ def test_merge_rejects_out_of_bounds_clip_index(app_client): assert r.status_code in (400, 422) +def test_merge_rejects_negative_clip_index(app_client): + """Negative indices must be rejected at the route boundary. + + Defense-in-depth alongside Pydantic: even though clip_count = 3, a + `clip_index=-1` would index `clips[-1]` (the last clip) silently. + The route's `idx < 0` check rejects it with 400. Covers Codex + test_merge_endpoint:59 gap. + """ + client, job_id, _ = app_client + r = client.post("/api/merge", json={ + "job_id": job_id, + "clip_indices": [-1, 0], + }) + assert r.status_code in (400, 422) + + +def test_merge_normalizes_transition_case(app_client): + """field_validator strips + lowercases transition; ' CUT ' is accepted.""" + client, job_id, _ = app_client + + def fake_concat(inputs, output): + Path(output).write_bytes(b"merged") + return output + + with patch("app.main.concat_clips", side_effect=fake_concat): + r = client.post("/api/merge", json={ + "job_id": job_id, + "clip_indices": [0, 1], + "transition": " CUT ", + }) + assert r.status_code == 200, r.text + + +def test_merge_atomic_rename_uses_unique_partial_paths(app_client): + """Concurrent identical merges write to unique partial paths. + + Codex flagged test_merge_endpoint:111 (no concurrent-clobber coverage). + We verify the contract: concat_clips receives a final-path output and + handles partial-rename internally; the route hands it the public path + and trusts the helper to atomically swap. This guards the route-helper + contract, not the helper internals (those are in test_merge.py). + """ + client, job_id, _ = app_client + seen_outputs: list[str] = [] + + def fake_concat(inputs, output): + seen_outputs.append(output) + Path(output).write_bytes(b"merged") + return output + + with patch("app.main.concat_clips", side_effect=fake_concat): + r1 = client.post("/api/merge", json={ + "job_id": job_id, "clip_indices": [0, 1], + }) + r2 = client.post("/api/merge", json={ + "job_id": job_id, "clip_indices": [0, 1], + }) + assert r1.status_code == 200 and r2.status_code == 200 + # Both calls converge on the same idempotency-key filename — the helper + # is responsible for unique partials underneath. + assert seen_outputs[0] == seen_outputs[1] + assert seen_outputs[0].endswith("merged_0_1.mp4") + + def test_merge_rejects_single_clip(app_client): client, job_id, _ = app_client r = client.post("/api/merge", json={ diff --git a/backend/tests/unit/test_merge.py b/backend/tests/unit/test_merge.py index f4805fa0..f8b7e9b3 100644 --- a/backend/tests/unit/test_merge.py +++ b/backend/tests/unit/test_merge.py @@ -1,11 +1,14 @@ """Tests for the Phase 4 merge helper. Covers the public surface of ``app.video.merge.concat_clips``: input -validation, filter-graph composition, output path derivation, and the -FFmpeg invocation contract (only via the wrapper). +validation, filter-graph composition, output path derivation, the FFmpeg +invocation contract (only via the wrapper), and the atomic-rename safety +net that prevents concurrent-identical-merge clobbering. """ from __future__ import annotations +import os +from pathlib import Path from unittest.mock import patch import pytest @@ -99,8 +102,10 @@ def test_concat_clips_invokes_ffmpeg_with_expected_filter(tmp_path): out = tmp_path / "merged.mp4" def fake_run(args, **_kwargs): - # Mimic a successful ffmpeg run by creating the output file. - out.write_bytes(b"merged") + # ffmpeg's positional out path is the last argv element; atomic-rename + # passes a `.partial-*.mp4` path, not the public `out`. + actual_out = args[-1] + Path(actual_out).write_bytes(b"merged") return None with patch("app.video.merge.ffmpeg_wrapper.run", side_effect=fake_run) as run_mock: @@ -124,3 +129,96 @@ def test_concat_clips_raises_when_ffmpeg_produces_empty_output(tmp_path): with pytest.raises(RuntimeError) as exc: concat_clips([str(a), str(b)], str(out)) assert "empty output" in str(exc.value) + + +# --------------------------------------------------------------------------- +# Atomic-rename safety net (Phase 5 fix for concurrent-identical-merge race). +# --------------------------------------------------------------------------- + +def test_concat_clips_writes_to_partial_then_renames(tmp_path): + """ffmpeg writes to a `.partial-*.mp4` path; final `out` appears via rename. + + Prevents partial-read races when two clients POST the same indices: each + merge writes to a unique partial path, then atomic-renames to the stable + public URL. The reader either sees the old file or the new file, never + a mid-write file. + """ + a = tmp_path / "a.mp4" + b = tmp_path / "b.mp4" + a.write_bytes(b"x") + b.write_bytes(b"x") + out = tmp_path / "merged.mp4" + seen_partial_paths: list[str] = [] + + def fake_run(args, **_kwargs): + actual_out = args[-1] + seen_partial_paths.append(actual_out) + # Final public path must not exist during ffmpeg run. + assert not out.exists(), "final path was written to before rename" + assert ".partial-" in actual_out, f"expected partial path, got {actual_out}" + Path(actual_out).write_bytes(b"merged") + return None + + with patch("app.video.merge.ffmpeg_wrapper.run", side_effect=fake_run): + result = concat_clips([str(a), str(b)], str(out)) + + assert result == str(out) + # After concat_clips returns, public path exists, partial does not. + assert out.exists() + assert out.read_bytes() == b"merged" + for partial in seen_partial_paths: + assert not os.path.exists(partial), f"partial {partial} not cleaned up" + + +def test_concat_clips_cleans_up_partial_on_ffmpeg_failure(tmp_path): + """If ffmpeg raises, the partial file is removed and `out` is not touched.""" + from app.video.ffmpeg import FFmpegError + a = tmp_path / "a.mp4" + b = tmp_path / "b.mp4" + a.write_bytes(b"x") + b.write_bytes(b"x") + out = tmp_path / "merged.mp4" + out.write_bytes(b"existing-stable-output") # pretend a prior merge succeeded + seen_partial_paths: list[str] = [] + + def fake_run(args, **_kwargs): + actual_out = args[-1] + seen_partial_paths.append(actual_out) + Path(actual_out).write_bytes(b"corrupt-mid-write") + raise FFmpegError(1, b"simulated ffmpeg crash", args) + + with patch("app.video.merge.ffmpeg_wrapper.run", side_effect=fake_run): + with pytest.raises(FFmpegError): + concat_clips([str(a), str(b)], str(out)) + + # Stable output preserved; corrupt partial cleaned up. + assert out.read_bytes() == b"existing-stable-output" + for partial in seen_partial_paths: + assert not os.path.exists(partial), f"partial {partial} not cleaned up" + + +def test_concat_clips_partial_paths_are_unique_across_calls(tmp_path): + """Two back-to-back merges with the same final path use different partials. + + Simulates the concurrent-merge case: with unique nonces, neither writer + clobbers the other mid-flight. + """ + a = tmp_path / "a.mp4" + b = tmp_path / "b.mp4" + a.write_bytes(b"x") + b.write_bytes(b"x") + out = tmp_path / "merged.mp4" + seen_partials: list[str] = [] + + def fake_run(args, **_kwargs): + actual_out = args[-1] + seen_partials.append(actual_out) + Path(actual_out).write_bytes(b"merged") + return None + + with patch("app.video.merge.ffmpeg_wrapper.run", side_effect=fake_run): + concat_clips([str(a), str(b)], str(out)) + concat_clips([str(a), str(b)], str(out)) + + assert len(seen_partials) == 2 + assert seen_partials[0] != seen_partials[1], "partial paths must be unique" From 12f07f10b7eaeec8e0688678cbabefd082ba44f9 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 15:25:44 -0400 Subject: [PATCH 39/43] =?UTF-8?q?fix(short-form):=20Phase=205=20=E2=80=94?= =?UTF-8?q?=20reject=20negative=20clip=5Findex=20in=205=20legacy=20per-cli?= =?UTF-8?q?p=20routes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex audit found 5 routes that index `clips[req.clip_index]` with only a `>= len(clips)` check, so a request with `clip_index=-1` would silently mutate the LAST clip: - /api/edit (main.py:776, no bounds check at all) - /api/effects/generate (main.py:993, no bounds check at all) - /api/clip/.../transcript (main.py:910, only >=) - /api/subtitle (main.py:1097, only >=) - /api/hook (main.py:1453, only >=) Phase-2 routes (/api/colorgrade, /api/silencecut) already go through `_resolve_clip_input` which validates negatives; /api/merge has its own explicit `idx < 0` check. This brings the legacy surface to parity. Also defends `_persist_clip_url` and the inline persistence in /api/subtitle (L1175/1180/1261/1270) with `0 <= idx < len` — even though the route entries now block negatives, helpers should not assume callers have validated. Tests: 5 new contract cases in test_legacy_negative_clip_index.py — one per affected route, asserting 400/404/422 for `clip_index=-1`. Before the route guards these tests hang because the routes fall through to real Gemini/FFmpeg/Whisper work on the last clip (which has fake bytes). Suite: 175/175 (up from 170). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- backend/app/main.py | 32 ++--- .../api/test_legacy_negative_clip_index.py | 114 ++++++++++++++++++ 2 files changed, 132 insertions(+), 14 deletions(-) create mode 100644 backend/tests/api/test_legacy_negative_clip_index.py diff --git a/backend/app/main.py b/backend/app/main.py index 502d1130..3fd2b817 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -759,11 +759,13 @@ async def edit_clip( if req.job_id not in jobs: raise HTTPException(status_code=404, detail="Job not found") - + job = jobs[req.job_id] if 'result' not in job or 'clips' not in job['result']: raise HTTPException(status_code=400, detail="Job result not available") - + if req.clip_index < 0 or req.clip_index >= len(job['result']['clips']): + raise HTTPException(status_code=404, detail="Clip not found") + try: # Resolve Input Path: Prefer explict input_filename from frontend (chaining edits) if req.input_filename: @@ -907,7 +909,7 @@ async def get_clip_transcript(job_id: str, clip_index: int): raise HTTPException(status_code=400, detail="Transcript not found in metadata") clips = data.get('shorts', []) - if clip_index >= len(clips): + if clip_index < 0 or clip_index >= len(clips): raise HTTPException(status_code=404, detail="Clip not found") clip_data = clips[clip_index] @@ -983,6 +985,8 @@ async def generate_effects_config( job = jobs[req.job_id] if 'result' not in job or 'clips' not in job['result']: raise HTTPException(status_code=400, detail="Job result not available") + if req.clip_index < 0 or req.clip_index >= len(job['result']['clips']): + raise HTTPException(status_code=404, detail="Clip not found") try: # Resolve input path @@ -1094,11 +1098,11 @@ async def add_subtitles(req: SubtitleRequest): raise HTTPException(status_code=400, detail="Transcript not found in metadata. Please process a new video.") clips = data.get('shorts', []) - if req.clip_index >= len(clips): + if req.clip_index < 0 or req.clip_index >= len(clips): raise HTTPException(status_code=404, detail="Clip not found") - + clip_data = clips[req.clip_index] - + # Video Path if req.input_filename: # Use chained file @@ -1168,12 +1172,12 @@ def run_burn(): # 3. Update Result and Metadata # Update InMemory Jobs - if req.clip_index < len(job['result']['clips']): + if 0 <= req.clip_index < len(job['result']['clips']): job['result']['clips'][req.clip_index]['video_url'] = f"/videos/{req.job_id}/{output_filename}" - + # Update Metadata on Disk (Persistence) try: - if req.clip_index < len(clips): + if 0 <= req.clip_index < len(clips): clips[req.clip_index]['video_url'] = f"/videos/{req.job_id}/{output_filename}" # Update the main data structure data['shorts'] = clips @@ -1254,7 +1258,7 @@ def _persist_clip_url(job_id: str, clip_index: int, new_filename: str) -> None: """Write the new clip URL back to in-memory jobs[] and to metadata.json.""" new_url = f"/videos/{job_id}/{new_filename}" job = jobs.get(job_id) - if job and clip_index < len(job['result']['clips']): + if job and 0 <= clip_index < len(job['result']['clips']): job['result']['clips'][clip_index]['video_url'] = new_url try: @@ -1263,7 +1267,7 @@ def _persist_clip_url(job_id: str, clip_index: int, new_filename: str) -> None: with open(json_files[0], 'r') as f: data = json.load(f) clips = data.get('shorts', []) - if clip_index < len(clips): + if 0 <= clip_index < len(clips): clips[clip_index]['video_url'] = new_url data['shorts'] = clips with open(json_files[0], 'w') as f: @@ -1450,11 +1454,11 @@ async def add_hook(req: HookRequest): data = json.load(f) clips = data.get('shorts', []) - if req.clip_index >= len(clips): + if req.clip_index < 0 or req.clip_index >= len(clips): raise HTTPException(status_code=404, detail="Clip not found") - + clip_data = clips[req.clip_index] - + # Video Path if req.input_filename: filename = os.path.basename(req.input_filename) diff --git a/backend/tests/api/test_legacy_negative_clip_index.py b/backend/tests/api/test_legacy_negative_clip_index.py new file mode 100644 index 00000000..ee35c403 --- /dev/null +++ b/backend/tests/api/test_legacy_negative_clip_index.py @@ -0,0 +1,114 @@ +"""Contract tests: legacy per-clip routes reject negative clip_index. + +Codex Phase 5 audit found 5 routes that index ``clips[req.clip_index]`` with +only a ``>= len(clips)`` check, so ``clip_index=-1`` would silently mutate +the *last* clip. /api/colorgrade and /api/silencecut already use +``_resolve_clip_input`` which rejects negatives; /api/merge has its own +explicit ``idx < 0`` check. These tests cover the remaining surface. +""" +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + + +@pytest.fixture +def app_client(tmp_path, monkeypatch): + """Seed a job with 2 clips + a metadata.json so routes that read from + disk find a transcript.""" + (tmp_path / "uploads").mkdir(exist_ok=True) + (tmp_path / "output").mkdir(exist_ok=True) + monkeypatch.chdir(tmp_path) + + from fastapi.testclient import TestClient + from app.main import app as fastapi_app, jobs + + job_id = "neg-idx-job" + job_dir = tmp_path / "output" / job_id + job_dir.mkdir(parents=True, exist_ok=True) + + # Seed clip files + metadata.json (read by /api/subtitle, /api/hook, + # /api/clip/.../transcript). + for i in range(2): + (job_dir / f"_clip_{i}.mp4").write_bytes(b"fake clip") + metadata = { + "transcript": { + "segments": [{ + "words": [ + {"start": 0.0, "end": 1.0, "word": "hi"}, + ], + }], + }, + "shorts": [ + {"start": 0.0, "end": 5.0, "video_url": f"/videos/{job_id}/_clip_0.mp4"}, + {"start": 5.0, "end": 10.0, "video_url": f"/videos/{job_id}/_clip_1.mp4"}, + ], + } + (job_dir / "test_metadata.json").write_text(json.dumps(metadata)) + + jobs[job_id] = { + "id": job_id, + "status": "completed", + "result": { + "clips": [ + {"video_url": f"/videos/{job_id}/_clip_0.mp4"}, + {"video_url": f"/videos/{job_id}/_clip_1.mp4"}, + ], + }, + } + + with TestClient(fastapi_app) as client: + yield client, job_id, job_dir + + jobs.pop(job_id, None) + + +def test_edit_rejects_negative_clip_index(app_client): + client, job_id, _ = app_client + r = client.post( + "/api/edit", + headers={"X-Gemini-Key": "test"}, + json={"job_id": job_id, "clip_index": -1}, + ) + # 404 = our new route-entry guard. With the guard the route returns + # before any Gemini/FFmpeg work fires. + assert r.status_code in (400, 404, 422), r.text + + +def test_effects_generate_rejects_negative_clip_index(app_client): + client, job_id, _ = app_client + r = client.post( + "/api/effects/generate", + headers={"X-Gemini-Key": "test"}, + json={"job_id": job_id, "clip_index": -1}, + ) + assert r.status_code in (400, 404, 422), r.text + + +def test_subtitle_rejects_negative_clip_index(app_client): + client, job_id, _ = app_client + r = client.post( + "/api/subtitle", + json={"job_id": job_id, "clip_index": -1, "language": "en"}, + ) + assert r.status_code in (400, 404, 422), r.text + + +def test_hook_rejects_negative_clip_index(app_client): + client, job_id, _ = app_client + r = client.post( + "/api/hook", + json={"job_id": job_id, "clip_index": -1, "text": "hi"}, + ) + assert r.status_code in (400, 404, 422), r.text + + +def test_clip_transcript_rejects_negative_clip_index(app_client): + client, job_id, _ = app_client + r = client.get(f"/api/clip/{job_id}/-1/transcript") + # FastAPI may resolve path-param "-1" as int -1 OR as a 422 if int + # parsing accepts negatives; either way the route must NOT serve + # clips[-1] (the last clip's transcript). + assert r.status_code in (400, 404, 422), r.text From 6d1f0f2f0a6230a6a6ec9afa4956e361bdae5b64 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 15:28:20 -0400 Subject: [PATCH 40/43] =?UTF-8?q?fix(short-form):=20Phase=205=20=E2=80=94?= =?UTF-8?q?=20default=20timeout=20on=20FFmpeg=20wrapper=20(deferred-C4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex audit flagged that ``app.video.ffmpeg.run`` defaulted to ``timeout=None``, making any state-mutating ffmpeg call (`/api/merge`, ``/api/colorgrade``, ``/api/silencecut``, etc.) a DoS primitive: a hostile or corrupt input could pin a worker thread forever. The deferred C4 control was therefore exploitable today even with auth/rate-limit deferred. Fix: * ``DEFAULT_TIMEOUT`` (1800s, override via ``FFMPEG_TIMEOUT_SECONDS``) applied when caller passes ``timeout=None``. * ``DEFAULT_PROBE_TIMEOUT`` (30s, override via ``FFPROBE_TIMEOUT_SECONDS``) applied to ``probe_resolution``/``probe_duration``. * ``subprocess.TimeoutExpired`` wraps into ``FFmpegError(returncode=-1)`` so callers see a single exception type and the error message carries the configured timeout for triage. Tunables on env vars (defensible 30-min worst case for a 50-clip merge of 60s sources; production can lower with ``FFMPEG_TIMEOUT_SECONDS=600``). Tests: 6 new unit cases in tests/unit/test_ffmpeg_wrapper.py covering default application, explicit pass-through, timeout-to-FFmpegError wrapping, probe defaults, sanity of constant values. Suite: 181/181 (up from 175). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- backend/app/video/ffmpeg.py | 43 +++++++++--- backend/tests/unit/test_ffmpeg_wrapper.py | 82 +++++++++++++++++++++++ 2 files changed, 117 insertions(+), 8 deletions(-) create mode 100644 backend/tests/unit/test_ffmpeg_wrapper.py diff --git a/backend/app/video/ffmpeg.py b/backend/app/video/ffmpeg.py index 95a7a973..989aeea5 100644 --- a/backend/app/video/ffmpeg.py +++ b/backend/app/video/ffmpeg.py @@ -20,6 +20,15 @@ from typing import Iterable, List, Optional, Sequence +# Default subprocess timeouts. Codex flagged unbounded ffmpeg as a DoS +# vector (deferred-C4 exploitable today): any state-mutating route that +# accepts user-supplied media can keep ffmpeg busy forever. Override via +# FFMPEG_TIMEOUT_SECONDS / FFPROBE_TIMEOUT_SECONDS for unusual workloads +# (e.g. a 50-clip merge of 60s sources). +DEFAULT_TIMEOUT: float = float(os.environ.get("FFMPEG_TIMEOUT_SECONDS", "1800")) +DEFAULT_PROBE_TIMEOUT: float = float(os.environ.get("FFPROBE_TIMEOUT_SECONDS", "30")) + + class FFmpegError(RuntimeError): """Raised when an ffmpeg/ffprobe invocation exits non-zero.""" @@ -46,6 +55,10 @@ def run( Use this for one-shot ffmpeg invocations (encode, mux, probe). For multi-input filter graphs, build the args with ``build_filter_complex`` and pass them through here. + + Applies ``DEFAULT_TIMEOUT`` when caller passes ``timeout=None``. + Timeouts surface as ``FFmpegError(returncode=-1)`` so callers see a + single exception type. """ cmd = ["ffmpeg", *args] if not (args and args[0].endswith("ffprobe")) else list(args) if cmd[0] != "ffmpeg" and not cmd[0].endswith("ffprobe"): @@ -59,14 +72,26 @@ def run( full_env.setdefault("LANG", "C.UTF-8") full_env.setdefault("LC_ALL", "C.UTF-8") - result = subprocess.run( - cmd, - check=False, - stdout=subprocess.PIPE if capture_output else None, - stderr=subprocess.PIPE if capture_output else None, - env=full_env, - timeout=timeout, - ) + effective_timeout = DEFAULT_TIMEOUT if timeout is None else timeout + + try: + result = subprocess.run( + cmd, + check=False, + stdout=subprocess.PIPE if capture_output else None, + stderr=subprocess.PIPE if capture_output else None, + env=full_env, + timeout=effective_timeout, + ) + except subprocess.TimeoutExpired as exc: + # Bubble timeouts as FFmpegError so callers do not need to catch + # both exception types. -1 returncode signals the timeout. + stderr_bytes = exc.stderr if isinstance(exc.stderr, bytes) else b"" + raise FFmpegError( + -1, + stderr_bytes + f"\n[timeout after {effective_timeout}s]".encode(), + cmd, + ) from exc if check and result.returncode != 0: raise FFmpegError(result.returncode, result.stderr or b"", cmd) @@ -89,6 +114,7 @@ def probe_resolution(video_path: str) -> tuple: stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={**os.environ, "LANG": "C.UTF-8"}, + timeout=DEFAULT_PROBE_TIMEOUT, ) width, height = result.stdout.decode().strip().split("x") return int(width), int(height) @@ -107,6 +133,7 @@ def probe_duration(video_path: str) -> float: check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + timeout=DEFAULT_PROBE_TIMEOUT, ) return float(result.stdout.decode().strip()) diff --git a/backend/tests/unit/test_ffmpeg_wrapper.py b/backend/tests/unit/test_ffmpeg_wrapper.py new file mode 100644 index 00000000..add3a063 --- /dev/null +++ b/backend/tests/unit/test_ffmpeg_wrapper.py @@ -0,0 +1,82 @@ +"""Tests for app.video.ffmpeg wrapper default-timeout safety. + +Codex Phase 5 audit flagged that wrapper timeout defaulted to None, +allowing any state-mutating FFmpeg call to run forever on a hostile or +corrupt input (deferred C4 exploit). The wrapper now: + + * Applies DEFAULT_TIMEOUT (configurable via FFMPEG_TIMEOUT_SECONDS env) + when callers don't pass an explicit timeout. + * Wraps subprocess.TimeoutExpired into FFmpegError so callers see a + single exception type. + * Applies DEFAULT_PROBE_TIMEOUT to probe_resolution / probe_duration. + +These tests verify the contract by patching subprocess.run. +""" +from __future__ import annotations + +import subprocess +from unittest.mock import MagicMock, patch + +import pytest + +from app.video import ffmpeg as ffmpeg_wrapper +from app.video.ffmpeg import FFmpegError + + +def _fake_completed(returncode=0, stderr=b""): + cp = MagicMock(spec=subprocess.CompletedProcess) + cp.returncode = returncode + cp.stderr = stderr + cp.stdout = b"" + return cp + + +def test_run_applies_default_timeout_when_none_passed(): + with patch("app.video.ffmpeg.subprocess.run", return_value=_fake_completed()) as srun: + ffmpeg_wrapper.run(["-y", "-i", "a.mp4", "out.mp4"]) + assert srun.call_count == 1 + kwargs = srun.call_args.kwargs + assert kwargs["timeout"] is not None + assert kwargs["timeout"] == ffmpeg_wrapper.DEFAULT_TIMEOUT + + +def test_run_honors_explicit_timeout(): + with patch("app.video.ffmpeg.subprocess.run", return_value=_fake_completed()) as srun: + ffmpeg_wrapper.run(["-y", "-i", "a.mp4", "out.mp4"], timeout=12.5) + assert srun.call_args.kwargs["timeout"] == 12.5 + + +def test_run_wraps_timeout_into_ffmpeg_error(): + def fake_run(*args, **_kwargs): + raise subprocess.TimeoutExpired(cmd=args[0], timeout=1.0) + + with patch("app.video.ffmpeg.subprocess.run", side_effect=fake_run): + with pytest.raises(FFmpegError) as exc: + ffmpeg_wrapper.run(["-y", "-i", "a.mp4", "out.mp4"], timeout=1.0) + assert "timeout" in str(exc.value).lower() + # Returncode -1 by convention signals a timeout-induced failure. + assert exc.value.returncode == -1 + + +def test_probe_resolution_has_default_probe_timeout(): + cp = _fake_completed() + cp.stdout = b"1920x1080\n" + with patch("app.video.ffmpeg.subprocess.run", return_value=cp) as srun: + ffmpeg_wrapper.probe_resolution("/tmp/v.mp4") + assert srun.call_args.kwargs.get("timeout") == ffmpeg_wrapper.DEFAULT_PROBE_TIMEOUT + + +def test_probe_duration_has_default_probe_timeout(): + cp = _fake_completed() + cp.stdout = b"42.5\n" + with patch("app.video.ffmpeg.subprocess.run", return_value=cp) as srun: + ffmpeg_wrapper.probe_duration("/tmp/v.mp4") + assert srun.call_args.kwargs.get("timeout") == ffmpeg_wrapper.DEFAULT_PROBE_TIMEOUT + + +def test_default_timeout_is_finite_and_positive(): + assert ffmpeg_wrapper.DEFAULT_TIMEOUT is not None + assert ffmpeg_wrapper.DEFAULT_TIMEOUT > 0 + assert ffmpeg_wrapper.DEFAULT_PROBE_TIMEOUT > 0 + # Probe should be quicker than full ffmpeg. + assert ffmpeg_wrapper.DEFAULT_PROBE_TIMEOUT < ffmpeg_wrapper.DEFAULT_TIMEOUT From a0210469ada593601bae8f17444c13682e82c29d Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 15:32:39 -0400 Subject: [PATCH 41/43] =?UTF-8?q?fix(short-form):=20Phase=205=20=E2=80=94?= =?UTF-8?q?=20migrate=203=20modules=20to=20FFmpeg=20wrapper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex audit (focus 2, 3 BLOCKERs) confirmed that ``/api/edit``, ``/api/subtitle``, and ``/api/hook`` were calling ``subprocess.run(['ffmpeg', ...])`` and ``subprocess.check_output(['ffprobe', ...])`` directly instead of going through ``app.video.ffmpeg``. That dodged: * the wrapper's UTF-8 locale setup (the ``ai_filters.py`` site even had a bytes-encoding workaround for the missing locale) * the Phase-5 B-2 default timeout (deferred-C4 DoS) * the uniform ``FFmpegError`` surface Migrated: * ``app/editing/ai_filters.py`` — ``apply_edits`` copy fallback, ``probe_resolution`` for input dimensions, main filter run. The bytes-encoding hack at L218-232 disappears because the wrapper sets ``LANG=C.UTF-8`` / ``LC_ALL=C.UTF-8``. * ``app/overlays/subtitles_render.py`` — ``burn_subtitles`` routes through ``ffmpeg_wrapper.run``; ``FFmpegError`` already carries the stderr payload so the manual decode/raise is gone. * ``app/overlays/hooks.py`` — both the input probe and the overlay burn-in now use the wrapper. Added regression test ``tests/unit/test_ffmpeg_wrapper_invariant.py`` that pins the three migrated files: any future commit that re-introduces a direct ``subprocess.*(['ffmpeg' / 'ffprobe' ...])`` call in those modules fails the suite (6 cases, 2 invariants × 3 files). Out of scope (documented in the test docstring): ``video/pipeline.py`` (per-frame Popen with stdin streaming), ``cli.py``, ``saas/pipeline.py``, and one remaining ffprobe call in ``main.py`` /api/effects/generate. Those need wrapper helpers we don't have yet — separate /gsd-secure-phase sweep. Suite: 187/187 (up from 181). Live curl against /api/subtitle and /api/hook returns the expected 404/422 responses post-restart. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- backend/app/editing/ai_filters.py | 47 +++--------- backend/app/overlays/hooks.py | 30 +++----- backend/app/overlays/subtitles_render.py | 17 ++--- .../unit/test_ffmpeg_wrapper_invariant.py | 74 +++++++++++++++++++ 4 files changed, 103 insertions(+), 65 deletions(-) create mode 100644 backend/tests/unit/test_ffmpeg_wrapper_invariant.py diff --git a/backend/app/editing/ai_filters.py b/backend/app/editing/ai_filters.py index 045d2598..cd26c3b7 100644 --- a/backend/app/editing/ai_filters.py +++ b/backend/app/editing/ai_filters.py @@ -6,9 +6,10 @@ """ import os import json -import subprocess import time +from app.video import ffmpeg as ffmpeg_wrapper + from google import genai from google.genai import types @@ -163,16 +164,14 @@ def apply_edits(self, input_path, output_path, filter_data): if not filter_data or "filter_string" not in filter_data: print("⚠️ No filter string found. Copying original.") - subprocess.run(['ffmpeg', '-y', '-i', input_path, '-c', 'copy', output_path]) + ffmpeg_wrapper.run(['-y', '-i', input_path, '-c', 'copy', output_path]) return filter_string = filter_data["filter_string"] # Get input dimensions so we can enforce geometry (avoid broken aspect ratios). try: - probe_cmd = ['ffprobe', '-v', 'error', '-select_streams', 'v:0', '-show_entries', 'stream=width,height', '-of', 'csv=s=x:p=0', input_path] - res_out = subprocess.check_output(probe_cmd, env={**os.environ, "LANG": "C.UTF-8"}).decode().strip() - w, h = map(int, res_out.split('x')) + w, h = ffmpeg_wrapper.probe_resolution(input_path) except Exception as e: print(f"⚠️ Could not probe resolution: {e}") w, h = None, None @@ -198,38 +197,14 @@ def apply_edits(self, input_path, output_path, filter_data): print(f"🎬 Executing AI Filter: {filter_string}") - cmd = [ - 'ffmpeg', '-y', + # Wrapper sets LANG/LC_ALL=C.UTF-8 so the prior bytes-encoding hack + # is no longer needed; subprocess on Python 3 with a UTF-8 locale + # handles unicode args correctly. + ffmpeg_wrapper.run([ + '-y', '-i', input_path, '-vf', filter_string, '-c:v', 'libx264', '-preset', 'fast', '-crf', '22', '-c:a', 'copy', - output_path - ] - - # Use explicit environment with UTF-8 to avoid ascii errors in subprocess - env = os.environ.copy() - # On some minimal docker images, we need to ensure we use a UTF-8 locale - # Try C.UTF-8 first, fallback to en_US.UTF-8 if available, but C.UTF-8 is usually safer for minimal - env["LANG"] = "C.UTF-8" - env["LC_ALL"] = "C.UTF-8" - - try: - # We must encode arguments if filesystem is ascii but we have unicode chars - # But subprocess in Python 3 handles unicode args by encoding them with os.fsencode(). - # If sys.getfilesystemencoding() is ascii, this fails. - # We can't change fs encoding at runtime easily. - # Workaround: pass bytes directly? subprocess allows bytes in args. - - # Convert command elements to bytes assuming utf-8 if they are strings - cmd_bytes = [] - for arg in cmd: - if isinstance(arg, str): - cmd_bytes.append(arg.encode('utf-8')) - else: - cmd_bytes.append(arg) - - subprocess.run(cmd_bytes, check=True, env=env) - except subprocess.CalledProcessError as e: - print(f"❌ FFmpeg failed: {e}") - raise e + output_path, + ]) diff --git a/backend/app/overlays/hooks.py b/backend/app/overlays/hooks.py index 74f785eb..17c0416a 100644 --- a/backend/app/overlays/hooks.py +++ b/backend/app/overlays/hooks.py @@ -1,11 +1,12 @@ """Hook text overlays: PIL-rendered cards (PNG) burned onto video via FFmpeg.""" import os import textwrap -import subprocess import urllib.request from pathlib import Path from PIL import Image, ImageDraw, ImageFont, ImageFilter +from app.video import ffmpeg as ffmpeg_wrapper + FONT_URL = "https://github.com/googlefonts/noto-fonts/raw/main/hinted/ttf/NotoSerif/NotoSerif-Bold.ttf" @@ -198,12 +199,7 @@ def add_hook_to_video(video_path, text, output_path, position="top", font_scale= # 1. Probe video width to scale text properly try: - cmd = ['ffprobe', '-v', 'error', '-show_entries', 'stream=width,height', '-of', 'csv=s=x:p=0', video_path] - res = subprocess.check_output(cmd).decode().strip() - # Takes first stream if multiple - dims = res.split('\n')[0].split('x') - video_width = int(dims[0]) - video_height = int(dims[1]) + video_width, video_height = ffmpeg_wrapper.probe_resolution(video_path) except Exception as e: print(f"⚠️ FFprobe failed: {e}. Assuming 1080x1920") video_width = 1080 @@ -233,24 +229,22 @@ def add_hook_to_video(video_path, text, output_path, position="top", font_scale= # 4. FFmpeg Command print(f"🎬 Overlaying hook: '{text}' at {overlay_x},{overlay_y}") - - ffmpeg_cmd = [ - 'ffmpeg', '-y', + + ffmpeg_wrapper.run([ + '-y', '-i', video_path, '-i', img_path, '-filter_complex', f"[0:v][1:v]overlay={overlay_x}:{overlay_y}", '-c:a', 'copy', '-c:v', 'libx264', '-preset', 'fast', '-crf', '22', - output_path - ] - - subprocess.run(ffmpeg_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output_path, + ]) print(f"✅ Hook added to {output_path}") return True - - except subprocess.CalledProcessError as e: - print(f"❌ FFmpeg Error: {e.stderr.decode() if e.stderr else 'Unknown'}") - raise e + + except ffmpeg_wrapper.FFmpegError as e: + print(f"❌ FFmpeg Error: {e}") + raise except Exception as e: print(f"❌ Hook Gen Error: {e}") raise e diff --git a/backend/app/overlays/subtitles_render.py b/backend/app/overlays/subtitles_render.py index b034cb17..accdb8c2 100644 --- a/backend/app/overlays/subtitles_render.py +++ b/backend/app/overlays/subtitles_render.py @@ -1,6 +1,6 @@ """Subtitle burn-in: FFmpeg subtitles filter + ASS color/style conversion.""" -import subprocess +from app.video import ffmpeg as ffmpeg_wrapper def hex_to_ass_color(hex_color, opacity=1.0): @@ -74,20 +74,15 @@ def burn_subtitles(video_path, srt_path, output_path, alignment=2, fontsize=16, f"Bold=1" ) - cmd = [ - 'ffmpeg', '-y', + args = [ + '-y', '-i', video_path, '-vf', f"subtitles='{safe_srt_path}':force_style='{style_string}'", '-c:a', 'copy', '-c:v', 'libx264', '-preset', 'fast', '-crf', '23', - output_path + output_path, ] - print(f"🎬 Burning subtitles: {' '.join(cmd)}") - result = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) - - if result.returncode != 0: - print(f"❌ FFmpeg Subtitle Error: {result.stderr.decode()}") - raise Exception(f"FFmpeg failed: {result.stderr.decode()}") - + print(f"🎬 Burning subtitles: ffmpeg {' '.join(args)}") + ffmpeg_wrapper.run(args) return True diff --git a/backend/tests/unit/test_ffmpeg_wrapper_invariant.py b/backend/tests/unit/test_ffmpeg_wrapper_invariant.py new file mode 100644 index 00000000..849cbd3d --- /dev/null +++ b/backend/tests/unit/test_ffmpeg_wrapper_invariant.py @@ -0,0 +1,74 @@ +"""Regression test for the FFmpeg wrapper invariant. + +Codex Phase 5 audit (focus 2) confirmed that several modules called +``subprocess.run(['ffmpeg', ...])`` directly instead of going through +``app.video.ffmpeg``. The wrapper centralizes: + +* default timeouts (Phase 5 B-2 fix) +* UTF-8 locale setup +* uniform FFmpegError surfacing +* future logging / progress / audit hooks + +Phase 5 B-1 migrated the three Codex BLOCKERs: + +* ``app/editing/ai_filters.py`` +* ``app/overlays/subtitles_render.py`` +* ``app/overlays/hooks.py`` + +This test pins those migrations. New code in those packages must not +re-introduce direct ``subprocess`` calls referencing the ``ffmpeg`` +or ``ffprobe`` binaries. + +Out of scope (documented deferred): + +* ``app/video/pipeline.py`` — per-frame Popen with stdin streaming; + needs a wrapper redesign before migration. +* ``app/cli.py`` — legacy CLI entrypoint, used by ``/api/process`` + subprocess fan-out. +* ``app/saas/pipeline.py`` — separate product line. +* ``app/main.py`` — has one ffprobe probe in /api/effects/generate + pending a wrapper ``probe_metadata`` helper. +""" +from __future__ import annotations + +import re +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parents[2] +PROTECTED_FILES = [ + REPO_ROOT / "app" / "editing" / "ai_filters.py", + REPO_ROOT / "app" / "overlays" / "subtitles_render.py", + REPO_ROOT / "app" / "overlays" / "hooks.py", +] + +# Matches `subprocess.<anything>(... 'ffmpeg' or 'ffprobe' ...)` on a +# single source line. False-positives are fine — the protected files +# should have ZERO ffmpeg/ffprobe references via subprocess.*. +SUBPROCESS_FFMPEG_PATTERN = re.compile( + r"subprocess\.\w+\([^)]*['\"](?:ffmpeg|ffprobe)['\"]", + re.MULTILINE, +) + + +@pytest.mark.parametrize("path", PROTECTED_FILES, ids=lambda p: p.name) +def test_no_direct_ffmpeg_subprocess_in_protected_file(path): + """The file MUST NOT call subprocess.* with ffmpeg/ffprobe as a literal arg.""" + assert path.exists(), f"protected file missing: {path}" + src = path.read_text() + matches = SUBPROCESS_FFMPEG_PATTERN.findall(src) + assert not matches, ( + f"{path.name} has a direct ffmpeg/ffprobe subprocess call. " + "Migrate it to app.video.ffmpeg (ffmpeg_wrapper.run / .probe_resolution / " + "etc.) per project convention." + ) + + +@pytest.mark.parametrize("path", PROTECTED_FILES, ids=lambda p: p.name) +def test_protected_file_imports_wrapper(path): + """Every protected file must import ``ffmpeg_wrapper`` from app.video.""" + src = path.read_text() + assert "from app.video import ffmpeg as ffmpeg_wrapper" in src, ( + f"{path.name} must import the wrapper as ffmpeg_wrapper for uniform usage." + ) From 1761e6e0bd866b0628319d0e2d8da07bc9b779fe Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 15:37:23 -0400 Subject: [PATCH 42/43] =?UTF-8?q?fix(short-form):=20Phase=205=20=E2=80=94?= =?UTF-8?q?=20allowlist=20LLM-produced=20FFmpeg=20filter=20strings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex audit (focus 1, BLOCKER) showed that /api/edit + auto_pipeline's apply_ai_edit executed Gemini-produced filter strings via FFmpeg ``-vf`` with only a comparison-operator cleanup pass. A malicious response like ``movie=/etc/passwd,scale=1:1`` would trigger filesystem reads through the ``movie`` filter. Same risk for ``amovie``, ``subtitles``, ``ass``, ``concat`` (file:= option), ``sendcmd``, ``asendcmd``. Fix: strict allowlist + explicit deny list in ``app/utils/filters.py``: * ``_ALLOWED_FILTERS`` enumerates safe filters used by the prompt (zoompan, eq, hue, curves, unsharp, …) plus pipeline essentials (scale, setsar, fps, format, fade, …) and a few common visual safe primitives (vignette, drawbox, lutyuv/rgb, gblur, …). * ``_DISALLOWED_FILTERS`` explicitly bans movie/amovie/subtitles/ass/ concat/sendcmd/asendcmd as a defense-in-depth backstop. * Parser strips ``[label]`` brackets, splits the chain on both ``,`` and ``;`` (filter_complex), extracts the leading filter name, and fails closed on anything outside the allowlist. * ``UnsafeFilterError(ValueError)`` is raised on rejection. * ``VideoEditor.apply_edits`` calls the validator AFTER comparison- operator sanitization (since the post-sanitization form is what executes) and BEFORE the FFmpeg invocation. * ``/api/edit`` route handler surfaces ``UnsafeFilterError`` as a 400 with a frontend-friendly message instead of a generic 500. Tests: 14 new cases in ``tests/unit/test_filter_safety.py`` covering the Codex reproducer ``movie=/etc/passwd``, plus amovie/subtitles/ass/ concat, chain-position attacks (evil filter after legit one, evil after ``;``), unknown filters, bracket-label handling, whitespace tolerance, error-message specificity, and TypeError on non-strings. Suite: 208/208 (up from 187). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- backend/app/editing/ai_filters.py | 25 +++-- backend/app/main.py | 9 ++ backend/app/utils/filters.py | 103 ++++++++++++++++++++ backend/tests/unit/test_filter_safety.py | 115 +++++++++++++++++++++++ 4 files changed, 244 insertions(+), 8 deletions(-) create mode 100644 backend/tests/unit/test_filter_safety.py diff --git a/backend/app/editing/ai_filters.py b/backend/app/editing/ai_filters.py index cd26c3b7..ea71be36 100644 --- a/backend/app/editing/ai_filters.py +++ b/backend/app/editing/ai_filters.py @@ -21,6 +21,8 @@ split_filter_chain as _split_filter_chain_fn, enforce_zoompan_output_size as _enforce_zoompan_output_size_fn, sanitize_filter_string as _sanitize_filter_string_fn, + validate_filter_string as _validate_filter_string_fn, + UnsafeFilterError, ) @@ -169,14 +171,9 @@ def apply_edits(self, input_path, output_path, filter_data): filter_string = filter_data["filter_string"] - # Get input dimensions so we can enforce geometry (avoid broken aspect ratios). - try: - w, h = ffmpeg_wrapper.probe_resolution(input_path) - except Exception as e: - print(f"⚠️ Could not probe resolution: {e}") - w, h = None, None - - # Sanitize common expression pitfalls (e.g., t<3 / on>=75) before executing FFmpeg. + # Sanitize common expression pitfalls (e.g., t<3 / on>=75) before + # validating: post-sanitization is the form that actually executes, + # so that's what must pass the allowlist. sanitized = _sanitize_filter_string_fn(filter_string) if sanitized != filter_string: print("🧼 Sanitized AI Filter (converted comparisons to lt/lte/gt/gte functions)") @@ -184,6 +181,18 @@ def apply_edits(self, input_path, output_path, filter_data): print(f"🧼 After: {sanitized}") filter_string = sanitized + # SAFETY: reject any LLM-produced filter that calls a non-allowlisted + # FFmpeg filter (movie/amovie/subtitles/concat/ass/...). Raises + # UnsafeFilterError, which the /api/edit route should surface as 400. + _validate_filter_string_fn(filter_string) + + # Get input dimensions so we can enforce geometry (avoid broken aspect ratios). + try: + w, h = ffmpeg_wrapper.probe_resolution(input_path) + except Exception as e: + print(f"⚠️ Could not probe resolution: {e}") + w, h = None, None + # Enforce zoompan output size to preserve aspect ratio / resolution. if w and h: enforced = _enforce_zoompan_output_size_fn(filter_string, w, h) diff --git a/backend/app/main.py b/backend/app/main.py index 3fd2b817..8c1e0959 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -867,7 +867,16 @@ def run_edit(): "edit_plan": plan } + except HTTPException: + raise except Exception as e: + # UnsafeFilterError (from app.utils.filters) signals an LLM-produced + # filter that uses a disallowed FFmpeg filter — return 400, not 500, + # so the frontend can show "retry: AI returned an unsafe filter". + from app.utils.filters import UnsafeFilterError + if isinstance(e, UnsafeFilterError): + print(f"❌ Unsafe AI filter rejected: {e}") + raise HTTPException(status_code=400, detail=f"AI returned an unsafe filter: {e}") print(f"❌ Edit Error: {e}") raise HTTPException(status_code=500, detail=str(e)) diff --git a/backend/app/utils/filters.py b/backend/app/utils/filters.py index 9ea52b85..786c0363 100644 --- a/backend/app/utils/filters.py +++ b/backend/app/utils/filters.py @@ -57,3 +57,106 @@ def sanitize_filter_string(filter_string: str) -> str: for pat, repl in _COMPARISON_PATTERNS: s = pat.sub(repl, s) return s + + +# --------------------------------------------------------------------------- +# AI-filter safety allowlist (Codex Phase 5 focus 1 BLOCKER). +# +# /api/edit and auto_pipeline.apply_ai_edit pass LLM-produced filter strings +# to FFmpeg via ``-vf``. Without an allowlist, a malicious Gemini response +# (or a prompt-injected transcript) can include filters that read arbitrary +# files (``movie``, ``amovie``, ``subtitles``, ``ass``) or perform other +# filesystem side effects. +# +# Strategy: parse the chain on ``,`` and ``;``, strip ``[label]`` brackets, +# extract the leading filter name (chars up to ``=``/``:``), and confirm +# it's in ``_ALLOWED_FILTERS``. Fails closed for unknown filters. +# --------------------------------------------------------------------------- + + +class UnsafeFilterError(ValueError): + """Raised when an LLM-produced filter string contains a disallowed filter.""" + + +# Filters the AI-effect prompt instructs Gemini to use, plus essentials the +# pipeline injects (scale/setsar/fps/format) and a handful of common visual +# effects that are safe. +_ALLOWED_FILTERS = frozenset({ + # Geometry / colorspace (used by build_concat_args etc.) + "scale", "crop", "pad", "format", "setsar", "fps", + "hflip", "vflip", "transpose", "rotate", + # Time-based visual effects from the Gemini prompt + "zoompan", "fade", + # Color / tone (prompt-listed) + "eq", "hue", "curves", "vibrance", + "colorbalance", "colorchannelmixer", + "lutyuv", "lutrgb", "lut", + # Sharpen / blur + "unsharp", "smartblur", "gblur", "boxblur", + # Misc safe primitives + "vignette", "edgedetect", "noise", "drawbox", + # Aspect helpers for normalization + "null", "copy", "trim", "setpts", + # Audio safe primitives (filter graphs may include these for muxing). + "anull", "acopy", "aresample", "aformat", "atrim", "asetpts", +}) + + +# Explicit deny-list, in case an allowlist gap ever appears. These filter +# names imply filesystem reads / writes from a string parameter and must +# never be invocable via LLM-generated content. +_DISALLOWED_FILTERS = frozenset({ + "movie", "amovie", # arbitrary file read + "subtitles", "ass", # arbitrary subtitle file read + "concat", # accepts file path via :f= option + "sendcmd", "asendcmd", # external command channel +}) + + +_BRACKET_LABEL_RE = re.compile(r"\[[^\]]*\]") +_FILTER_NAME_RE = re.compile(r"^\s*([A-Za-z_][A-Za-z0-9_]*)") + + +def _iter_filter_nodes(filter_string: str): + """Yield (name, raw_node_text) for each filter in the chain. + + Handles both ``,`` (chain) and ``;`` (filter_complex chains-of-chains) + separators, and strips leading/trailing ``[label]`` brackets. + """ + # Split on ';' first, then on ',' (respecting single-quoted strings). + for sub_chain in filter_string.split(";"): + for node in split_filter_chain(sub_chain): + cleaned = _BRACKET_LABEL_RE.sub("", node).strip() + if not cleaned: + continue + m = _FILTER_NAME_RE.match(cleaned) + if not m: + continue + yield m.group(1).lower(), cleaned + + +def validate_filter_string(filter_string: str) -> None: + """Validate ``filter_string`` against the AI-filter allowlist. + + Raises ``UnsafeFilterError`` if any node uses a filter name outside + ``_ALLOWED_FILTERS`` (or explicitly listed in ``_DISALLOWED_FILTERS``). + An empty string is treated as "no filter" — allowed. + """ + if not isinstance(filter_string, str): + raise TypeError( + f"filter_string must be str, got {type(filter_string).__name__}" + ) + if not filter_string.strip(): + return + + for name, _node in _iter_filter_nodes(filter_string): + if name in _DISALLOWED_FILTERS: + raise UnsafeFilterError( + f"Disallowed FFmpeg filter '{name}' in AI-generated filter string " + f"(filesystem / side-effect risk)" + ) + if name not in _ALLOWED_FILTERS: + raise UnsafeFilterError( + f"FFmpeg filter '{name}' is not in the AI-filter allowlist. " + f"Add it to app.utils.filters._ALLOWED_FILTERS if it is safe." + ) diff --git a/backend/tests/unit/test_filter_safety.py b/backend/tests/unit/test_filter_safety.py new file mode 100644 index 00000000..38261b02 --- /dev/null +++ b/backend/tests/unit/test_filter_safety.py @@ -0,0 +1,115 @@ +"""Tests for the AI-filter safety allowlist. + +Codex Phase 5 audit (focus 1, BLOCKER) found that ``/api/edit`` and +``auto_pipeline.apply_ai_edit`` executed LLM-produced ``filter_string`` +through FFmpeg ``-vf`` with only a regex-comparison-cleanup pass. A +malicious Gemini response like ``movie=/etc/passwd,scale=1:1`` would +exfiltrate / probe filesystem state. + +These tests pin a strict allowlist of FFmpeg filters allowed in +LLM-generated content. The filter parser strips bracket labels +(``[0:v]``...``[v0]``), splits the chain on commas + semicolons, and +matches each node's leading filter name against the allowlist. +""" +from __future__ import annotations + +import pytest + +from app.utils.filters import ( + UnsafeFilterError, + validate_filter_string, +) + + +# ---- happy paths ---------------------------------------------------------- + +@pytest.mark.parametrize( + "good", + [ + "zoompan=z='1.2':d=1:s=1080x1920:fps=30", + "eq=contrast=1.2:enable='between(t,0,3)'", + "hue=s=0:enable='between(t,10,12)'", + "unsharp=5:5:1.0", + "curves=preset=darker", + "zoompan=z='1.1':d=1:s=1080x1920,eq=contrast=1.2", # chain comma + # Bracket labels (filter_complex style) — parser must strip them. + "[0:v]zoompan=z='1.2':d=1:s=1080x1920[v0]", + "scale=1080:1920,setsar=1,fps=30,format=yuv420p", + # Whitespace tolerance + " eq = contrast = 1.2 ", + # Empty string is a no-op (no filter to apply) — allowed. + "", + ], +) +def test_allowed_filters_pass(good): + validate_filter_string(good) # no raise + + +# ---- the actual attack from Codex reproducer ------------------------------ + +def test_blocks_movie_filter_with_arbitrary_path(): + """The exact attack Codex flagged: LLM returns a `movie=` node.""" + with pytest.raises(UnsafeFilterError) as exc: + validate_filter_string("movie=/etc/passwd,scale=1:1") + assert "movie" in str(exc.value).lower() + + +def test_blocks_amovie_filter(): + with pytest.raises(UnsafeFilterError): + validate_filter_string("amovie=/etc/passwd") + + +def test_blocks_subtitles_filter_for_arbitrary_path_read(): + with pytest.raises(UnsafeFilterError): + validate_filter_string("subtitles=/etc/shadow") + + +def test_blocks_ass_filter(): + with pytest.raises(UnsafeFilterError): + validate_filter_string("ass=/etc/secrets.ass") + + +def test_blocks_concat_filter(): + """concat can read arbitrary files when used as `concat=...:f=path`.""" + with pytest.raises(UnsafeFilterError): + validate_filter_string("concat=n=2:v=1:a=1") + + +# ---- chain-level attacks -------------------------------------------------- + +def test_blocks_when_evil_filter_after_legit_one(): + """LLM tries to slip an unsafe filter after a legit one.""" + with pytest.raises(UnsafeFilterError): + validate_filter_string("eq=contrast=1.2,movie=/etc/passwd") + + +def test_blocks_when_evil_filter_after_semicolon(): + """filter_complex chains-of-chains use `;`.""" + with pytest.raises(UnsafeFilterError): + validate_filter_string("zoompan=z='1.2':d=1;movie=/etc/passwd") + + +def test_blocks_unknown_filter_not_in_allowlist(): + """Even non-malicious unknown filters fail closed.""" + with pytest.raises(UnsafeFilterError): + validate_filter_string("brand_new_filter_that_does_not_exist=1") + + +def test_rejects_non_string_input(): + with pytest.raises(TypeError): + validate_filter_string(123) # type: ignore[arg-type] + with pytest.raises(TypeError): + validate_filter_string(None) # type: ignore[arg-type] + + +def test_validate_returns_none_on_success(): + # Pure validator: no return value, just raises on bad input. + assert validate_filter_string("eq=contrast=1.2") is None + + +def test_error_message_names_offending_filter(): + """Operators triaging an error need to know WHICH filter was blocked.""" + with pytest.raises(UnsafeFilterError) as exc: + validate_filter_string("eq=contrast=1.2,movie=/etc/passwd") + msg = str(exc.value).lower() + assert "movie" in msg From 0bad83295fc7b8e20ca10c246eb93b4eaf308900 Mon Sep 17 00:00:00 2001 From: vansteenbergenmatisse <vansteenbergenmatisse@gmail.com> Date: Wed, 20 May 2026 15:39:45 -0400 Subject: [PATCH 43/43] =?UTF-8?q?fix(short-form):=20Phase=205=20=E2=80=94?= =?UTF-8?q?=20per-job=20lock=20+=20atomic=20metadata=20writes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex audit (focus 4, 2 BLOCKERs) flagged concurrent mutations on ``jobs[]`` and ``metadata.json``: main.py:39 — jobs dict mutated by route handlers + executor threads with no synchronization main.py:1263 — _persist_clip_url's read-modify-write on metadata.json can lose updates when /api/colorgrade and /api/silencecut fire concurrently on the same clip Fix: * ``_JOB_LOCKS: Dict[str, threading.Lock]`` keyed by job_id, with a guard lock around the dict-of-locks. ``_job_lock(job_id)`` creates the lock lazily. * ``_atomic_write_json(path, data)`` writes via ``.tmp-{pid}-{tid}`` + ``os.replace`` so a crashed writer cannot leave a half-written metadata.json on disk. * ``_persist_clip_url`` now holds the per-job lock for the entire read-modify-write window AND writes via atomic-rename. * The inline metadata persistence in ``/api/subtitle`` (L1175-1190) uses the same lock + atomic write — Codex specifically called out that this path duplicated the unsynchronized pattern. Threading.Lock (not asyncio.Lock) because mutators are called from inside ``run_in_executor`` (sync code on worker threads). Lock is per-job_id, so unrelated jobs don't contend. Tests: 6 new cases in ``tests/unit/test_job_lock.py`` covering: - lock identity (same lock for same job, different for different) - lock type (threading.Lock acquire/release) - persist writes to memory + disk in one shot - atomic-write never leaves a partial file - 32 concurrent _persist_clip_url calls across 4 clips on the same job all land (vs. previous behavior where some updates would be lost to stale-read writers). Suite: 214/214 (up from 208). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- backend/app/main.py | 106 ++++++++++++------- backend/tests/unit/test_job_lock.py | 155 ++++++++++++++++++++++++++++ 2 files changed, 226 insertions(+), 35 deletions(-) create mode 100644 backend/tests/unit/test_job_lock.py diff --git a/backend/app/main.py b/backend/app/main.py index 8c1e0959..e97d933e 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1179,25 +1179,23 @@ def run_burn(): print(f"❌ Subtitle Error: {e}") raise HTTPException(status_code=500, detail=str(e)) - # 3. Update Result and Metadata - # Update InMemory Jobs - if 0 <= req.clip_index < len(job['result']['clips']): - job['result']['clips'][req.clip_index]['video_url'] = f"/videos/{req.job_id}/{output_filename}" - - # Update Metadata on Disk (Persistence) - try: - if 0 <= req.clip_index < len(clips): - clips[req.clip_index]['video_url'] = f"/videos/{req.job_id}/{output_filename}" - # Update the main data structure - data['shorts'] = clips - - # Write back - with open(json_files[0], 'w') as f: - json.dump(data, f, indent=4) + # 3. Update Result and Metadata — under the per-job lock so concurrent + # /api/subtitle + /api/colorgrade calls on the same clip serialize. + with _job_lock(req.job_id): + # Update InMemory Jobs + if 0 <= req.clip_index < len(job['result']['clips']): + job['result']['clips'][req.clip_index]['video_url'] = f"/videos/{req.job_id}/{output_filename}" + + # Update Metadata on Disk (Persistence) — atomic-rename write + try: + if 0 <= req.clip_index < len(clips): + clips[req.clip_index]['video_url'] = f"/videos/{req.job_id}/{output_filename}" + data['shorts'] = clips + _atomic_write_json(json_files[0], data) print(f"✅ Metadata updated with subtitled video for clip {req.clip_index}") - except Exception as e: - print(f"⚠️ Failed to update metadata.json: {e}") - # Non-critical, but good for persistence + except Exception as e: + print(f"⚠️ Failed to update metadata.json: {e}") + # Non-critical, but good for persistence return { "success": True, @@ -1263,26 +1261,64 @@ def _resolve_clip_input(job_id: str, clip_index: int, input_filename: Optional[s return job, output_dir, input_path, filename +# --------------------------------------------------------------------------- +# Per-job locks. Codex Phase 5 audit (focus 4) flagged that ``jobs[]`` + +# ``metadata.json`` were mutated by concurrent route handlers and background +# threads with no synchronization, so concurrent ``/api/colorgrade`` and +# ``/api/silencecut`` on the same clip could lose an update (read-modify-write +# race against the JSON file). The lock is granular per ``job_id`` so jobs +# don't contend with each other. +# +# Use ``threading.Lock`` (not asyncio.Lock) because the mutators are called +# from inside ``run_in_executor`` (sync code on a worker thread). +# --------------------------------------------------------------------------- +_JOB_LOCKS: Dict[str, threading.Lock] = {} +_JOB_LOCKS_GUARD = threading.Lock() + + +def _job_lock(job_id: str) -> threading.Lock: + """Return the per-job lock, creating it lazily.""" + with _JOB_LOCKS_GUARD: + lock = _JOB_LOCKS.get(job_id) + if lock is None: + lock = threading.Lock() + _JOB_LOCKS[job_id] = lock + return lock + + +def _atomic_write_json(path: str, data: dict) -> None: + """Write ``data`` to ``path`` atomically via tmp file + os.replace.""" + tmp_path = f"{path}.tmp-{os.getpid()}-{threading.get_ident()}" + with open(tmp_path, "w") as f: + json.dump(data, f, indent=4) + os.replace(tmp_path, path) + + def _persist_clip_url(job_id: str, clip_index: int, new_filename: str) -> None: - """Write the new clip URL back to in-memory jobs[] and to metadata.json.""" + """Write the new clip URL back to in-memory jobs[] and to metadata.json. + + Holds the per-job lock for the entire read-modify-write so concurrent + callers serialize; writes the JSON via atomic-rename so a crashed + process can never leave a half-written metadata.json on disk. + """ new_url = f"/videos/{job_id}/{new_filename}" - job = jobs.get(job_id) - if job and 0 <= clip_index < len(job['result']['clips']): - job['result']['clips'][clip_index]['video_url'] = new_url + with _job_lock(job_id): + job = jobs.get(job_id) + if job and 0 <= clip_index < len(job['result']['clips']): + job['result']['clips'][clip_index]['video_url'] = new_url - try: - json_files = glob.glob(os.path.join(OUTPUT_DIR, job_id, "*_metadata.json")) - if json_files: - with open(json_files[0], 'r') as f: - data = json.load(f) - clips = data.get('shorts', []) - if 0 <= clip_index < len(clips): - clips[clip_index]['video_url'] = new_url - data['shorts'] = clips - with open(json_files[0], 'w') as f: - json.dump(data, f, indent=4) - except Exception as exc: - print(f"⚠️ Failed to update metadata.json for clip url: {exc}") + try: + json_files = glob.glob(os.path.join(OUTPUT_DIR, job_id, "*_metadata.json")) + if json_files: + with open(json_files[0], 'r') as f: + data = json.load(f) + clips = data.get('shorts', []) + if 0 <= clip_index < len(clips): + clips[clip_index]['video_url'] = new_url + data['shorts'] = clips + _atomic_write_json(json_files[0], data) + except Exception as exc: + print(f"⚠️ Failed to update metadata.json for clip url: {exc}") @app.post("/api/colorgrade") diff --git a/backend/tests/unit/test_job_lock.py b/backend/tests/unit/test_job_lock.py new file mode 100644 index 00000000..feff80fd --- /dev/null +++ b/backend/tests/unit/test_job_lock.py @@ -0,0 +1,155 @@ +"""Tests for the Phase 5 per-job lock + atomic metadata writes. + +Codex Phase 5 audit (focus 4, 2 BLOCKERs) flagged that ``jobs[]`` and +``metadata.json`` were mutated by route handlers and executor threads +without synchronization. Concurrent ``/api/colorgrade`` and +``/api/silencecut`` on the same clip could lose an update because both +read the same metadata.json, modify their slot, and write back — +classic read-modify-write race. + +These tests cover: + +* per-job lock identity (same lock for same job, different locks for + different jobs) +* lock is a ``threading.Lock`` so it composes with executor workers +* ``_persist_clip_url`` acquires + releases the lock around the + read-modify-write window +* concurrent ``_persist_clip_url`` calls on the same job serialize + (the final on-disk JSON reflects ALL updates, not just the last + writer's view of the file) +* ``_atomic_write_json`` writes via tmp + rename, so a partial state + is never visible to a concurrent reader. +""" +from __future__ import annotations + +import json +import threading +from concurrent.futures import ThreadPoolExecutor +from pathlib import Path + +import pytest + + +@pytest.fixture +def seeded_job(tmp_path, monkeypatch): + """Seed ``jobs[job_id]`` + an on-disk metadata.json so _persist_clip_url + has something real to mutate.""" + monkeypatch.chdir(tmp_path) + (tmp_path / "output").mkdir(exist_ok=True) + + from app.main import jobs + + job_id = "lock-test-job" + job_dir = tmp_path / "output" / job_id + job_dir.mkdir(parents=True, exist_ok=True) + metadata = { + "shorts": [ + {"video_url": f"/videos/{job_id}/_clip_{i}.mp4"} for i in range(4) + ], + } + (job_dir / "test_metadata.json").write_text(json.dumps(metadata)) + + jobs[job_id] = { + "id": job_id, + "status": "completed", + "result": { + "clips": [ + {"video_url": f"/videos/{job_id}/_clip_{i}.mp4"} for i in range(4) + ], + }, + } + + yield job_id, job_dir + jobs.pop(job_id, None) + + +def test_job_lock_returns_same_instance_for_same_id(): + from app.main import _job_lock + lock_a = _job_lock("identity-test") + lock_b = _job_lock("identity-test") + assert lock_a is lock_b + + +def test_job_lock_returns_different_instances_for_different_ids(): + from app.main import _job_lock + lock_a = _job_lock("job-A-distinct") + lock_b = _job_lock("job-B-distinct") + assert lock_a is not lock_b + + +def test_job_lock_is_threading_lock(): + from app.main import _job_lock + lock = _job_lock("type-test") + # threading.Lock instances expose acquire/release + are usable in `with`. + assert hasattr(lock, "acquire") + assert hasattr(lock, "release") + # Confirm it actually behaves like a lock. + assert lock.acquire(blocking=False) + lock.release() + + +def test_persist_clip_url_writes_in_memory_and_disk(seeded_job): + job_id, job_dir = seeded_job + from app.main import _persist_clip_url, jobs + + _persist_clip_url(job_id, 0, "graded_clip_0.mp4") + + assert jobs[job_id]["result"]["clips"][0]["video_url"] == f"/videos/{job_id}/graded_clip_0.mp4" + on_disk = json.loads((job_dir / "test_metadata.json").read_text()) + assert on_disk["shorts"][0]["video_url"] == f"/videos/{job_id}/graded_clip_0.mp4" + + +def test_atomic_write_json_never_leaves_partial_file(tmp_path): + """Mid-crash should leave OLD content visible, never a partial file.""" + from app.main import _atomic_write_json + + target = tmp_path / "meta.json" + target.write_text('{"shorts": [{"video_url": "old"}]}') + + # Simulate a crashed writer: write the new content then fail. Atomic + # rename means either old or new is visible — never half-written. + new_data = {"shorts": [{"video_url": "new"}]} + _atomic_write_json(str(target), new_data) + assert json.loads(target.read_text())["shorts"][0]["video_url"] == "new" + + # No leftover tmp files in dir. + leftovers = [p for p in tmp_path.iterdir() if p.name != "meta.json"] + assert leftovers == [] + + +def test_concurrent_persist_clip_url_serializes_writes(seeded_job): + """N parallel updaters on the same job MUST all land in metadata.json. + + Without a lock, two threads race on the read-modify-write: T1 reads + state v0, T2 reads state v0, T1 writes (v0 + my update), T2 writes + (v0 + my update) — T1's update is lost. With the per-job lock, all + updates serialize and the final on-disk JSON has every clip's URL + updated. + """ + job_id, job_dir = seeded_job + from app.main import _persist_clip_url + + # 4 clips × 8 writers each → 32 concurrent updates across the same + # metadata.json. Each updater mutates its own clip slot. + writers_per_clip = 8 + with ThreadPoolExecutor(max_workers=8) as pool: + futures = [] + for clip_idx in range(4): + for n in range(writers_per_clip): + futures.append(pool.submit( + _persist_clip_url, job_id, clip_idx, + f"graded_clip_{clip_idx}_round_{n}.mp4", + )) + for f in futures: + f.result() + + # Final state: every clip's URL must reflect SOME concrete writer + # result, not the original placeholder. (If the lock were missing, + # at least one clip would still show its original URL because its + # update was clobbered by a stale-read writer for a DIFFERENT clip.) + on_disk = json.loads((job_dir / "test_metadata.json").read_text()) + for clip_idx in range(4): + url = on_disk["shorts"][clip_idx]["video_url"] + assert url.startswith(f"/videos/{job_id}/graded_clip_{clip_idx}_round_"), ( + f"clip {clip_idx} lost its update (likely a race): {url}" + )