Skip to content

Commit 040ed61

Browse files
authored
Add exclude_dirs support for translation system (#894)
1 parent 4614ef7 commit 040ed61

File tree

5 files changed

+77
-5
lines changed

5 files changed

+77
-5
lines changed

TRANSLATING.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,19 @@ flowchart TD
190190
- The system uses the last merged translation PR's commit SHA as the baseline for change detection
191191
- **Translation PRs should be merged promptly** to avoid them stacking up or becoming outdated
192192

193+
### Excluding Directories from Translation
194+
195+
To prevent specific directories under `docs/en/docs/` from being translated, add them to `_scripts/translate/translate_config.yml`:
196+
197+
```yaml
198+
exclude_dirs:
199+
- example_docs_set
200+
- archive
201+
```
202+
203+
Excluded directories are skipped during sync (both new and outdated detection) and are not flagged as orphaned.
204+
Pages in excluded directories still appear on translated sites, showing the English content with a "Translation in Progress" admonition.
205+
193206
---
194207
195208
## Reviewing Translation PRs
@@ -413,6 +426,7 @@ _scripts/
413426
│ ├── __init__.py # Package marker
414427
│ ├── __main__.py # Entry point
415428
│ ├── config.py # Constants and configuration
429+
│ ├── translate_config.yml# Translation scope config (exclude dirs, etc.)
416430
│ ├── models.py # Data structures
417431
│ ├── paths.py # Path utilities
418432
│ ├── prompts.py # Prompt loading

_scripts/translate/config.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,18 @@
33
from __future__ import annotations
44

55
import os
6+
from functools import lru_cache
67
from pathlib import Path
78

9+
import yaml
810
from rich.console import Console
911

1012
# Paths (resolved relative to this file: _scripts/translate/config.py)
1113
REPO_ROOT = Path(__file__).parent.parent.parent
1214
DOCS_ROOT = REPO_ROOT / "docs"
1315
EN_DOCS = DOCS_ROOT / "en" / "docs"
1416
SCRIPTS_DIR = REPO_ROOT / "_scripts"
17+
TRANSLATE_CONFIG = Path(__file__).parent / "translate_config.yml"
1518

1619
# Claude API settings
1720
MODEL = "claude-sonnet-4-6"
@@ -28,6 +31,23 @@
2831
DEFAULT_PARALLEL = 10
2932
PRIORITY_DIRS = ["hello_nextflow", "hello_nf-core", "nf4_science", "envsetup"]
3033

34+
35+
@lru_cache
36+
def get_translate_config() -> dict:
37+
"""Load translation configuration from translate_config.yml."""
38+
if not TRANSLATE_CONFIG.exists():
39+
return {}
40+
data = yaml.safe_load(TRANSLATE_CONFIG.read_text(encoding="utf-8"))
41+
return data if isinstance(data, dict) else {}
42+
43+
44+
def get_exclude_dirs() -> set[str]:
45+
"""Return the set of directory names to exclude from translation."""
46+
cfg = get_translate_config()
47+
dirs = cfg.get("exclude_dirs", [])
48+
return set(dirs) if isinstance(dirs, list) else set()
49+
50+
3151
# Comment styles by language (used for code block post-processing)
3252
HASH_COMMENT_LANGS = {"python", "py", "sh", "bash", "dockerfile", "yaml", "yml", "toml"}
3353
SLASH_COMMENT_LANGS = {"console"} # Note: json has no comments

_scripts/translate/git_utils.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,14 @@
99

1010
import git
1111

12-
from .config import DOCS_ROOT, REPO_ROOT, SCRIPTS_DIR, ConfigError
12+
from .config import (
13+
DOCS_ROOT,
14+
EN_DOCS,
15+
REPO_ROOT,
16+
SCRIPTS_DIR,
17+
ConfigError,
18+
get_exclude_dirs,
19+
)
1320
from .models import TranslationFile
1421
from .paths import en_to_lang_path, iter_en_docs, lang_to_en_path
1522

@@ -217,11 +224,24 @@ def get_missing_files(lang: str) -> list[TranslationFile]:
217224

218225

219226
def get_orphaned_files(lang: str) -> list[Path]:
220-
"""Find translation files without English source."""
227+
"""Find translation files without English source.
228+
229+
Translations in excluded directories are not flagged as orphaned,
230+
since their English sources are intentionally skipped.
231+
"""
221232
lang_docs = DOCS_ROOT / lang / "docs"
222233
if not lang_docs.exists():
223234
return []
224-
return [p for p in lang_docs.rglob("*.md") if not lang_to_en_path(p, lang).exists()]
235+
exclude_dirs = get_exclude_dirs()
236+
orphaned = []
237+
for p in lang_docs.rglob("*.md"):
238+
# Skip files in excluded directories
239+
rel = p.relative_to(lang_docs)
240+
if rel.parts and rel.parts[0] in exclude_dirs:
241+
continue
242+
if not lang_to_en_path(p, lang).exists():
243+
orphaned.append(p)
244+
return orphaned
225245

226246

227247
def get_renamed_files(baseline: str) -> dict[Path, Path]:

_scripts/translate/paths.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import yaml
99

10-
from .config import DOCS_ROOT, EN_DOCS, PRIORITY_DIRS, ConfigError
10+
from .config import DOCS_ROOT, EN_DOCS, PRIORITY_DIRS, ConfigError, get_exclude_dirs
1111

1212

1313
@lru_cache
@@ -41,17 +41,27 @@ def lang_to_en_path(lang_path: Path, lang: str) -> Path:
4141
return EN_DOCS / lang_path.relative_to(DOCS_ROOT / lang / "docs")
4242

4343

44+
def _is_excluded(path: Path, exclude_dirs: set[str]) -> bool:
45+
"""Check if a path falls under an excluded directory."""
46+
if not exclude_dirs:
47+
return False
48+
rel = path.relative_to(EN_DOCS)
49+
return bool(rel.parts) and rel.parts[0] in exclude_dirs
50+
51+
4452
def iter_en_docs() -> list[Path]:
4553
"""List all English docs in priority order.
4654
4755
Priority directories are listed first, then remaining files.
4856
Within each group, files are sorted alphabetically.
57+
Directories listed in translate_config.yml ``exclude_dirs`` are skipped.
4958
"""
59+
exclude_dirs = get_exclude_dirs()
5060
paths: list[Path] = []
5161
seen: set[Path] = set()
5262

5363
def add(p: Path) -> None:
54-
if p not in seen:
64+
if p not in seen and not _is_excluded(p, exclude_dirs):
5565
paths.append(p)
5666
seen.add(p)
5767

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Translation configuration
2+
# Controls which English documentation files are included in automatic translation.
3+
4+
# Directories under docs/en/docs/ to exclude from translation.
5+
# Files in these directories will not be translated, and existing
6+
# translations will not be flagged as orphaned.
7+
# Example: ["side_quests", "archive"]
8+
exclude_dirs: []

0 commit comments

Comments
 (0)