diff --git a/recce/cli.py b/recce/cli.py
index d72c797b2..b839079a2 100644
--- a/recce/cli.py
+++ b/recce/cli.py
@@ -134,7 +134,12 @@ def _add_options(func):
dbt_related_options = [
- click.option("--target", "-t", help="Which target to load for the given profile.", type=click.STRING),
+ click.option(
+ "--target",
+ "-t",
+ help="Which target to load for the given profile.",
+ type=click.STRING,
+ ),
click.option("--profile", help="Which existing profile to load.", type=click.STRING),
click.option(
"--project-dir",
@@ -152,8 +157,18 @@ def _add_options(func):
sqlmesh_related_options = [
click.option("--sqlmesh", is_flag=True, help="Use SQLMesh ", hidden=True),
- click.option("--sqlmesh-envs", is_flag=False, help="SQLMesh envs to compare. SOURCE:TARGET", hidden=True),
- click.option("--sqlmesh-config", is_flag=False, help="SQLMesh config name to use", hidden=True),
+ click.option(
+ "--sqlmesh-envs",
+ is_flag=False,
+ help="SQLMesh envs to compare. SOURCE:TARGET",
+ hidden=True,
+ ),
+ click.option(
+ "--sqlmesh-config",
+ is_flag=False,
+ help="SQLMesh config name to use",
+ hidden=True,
+ ),
]
recce_options = [
@@ -165,7 +180,11 @@ def _add_options(func):
show_default=True,
),
click.option(
- "--error-log", help="Path to the error log file.", type=click.Path(), default=RECCE_ERROR_LOG_FILE, hidden=True
+ "--error-log",
+ help="Path to the error log file.",
+ type=click.Path(),
+ default=RECCE_ERROR_LOG_FILE,
+ hidden=True,
),
click.option("--debug", is_flag=True, help="Enable debug mode.", hidden=True),
]
@@ -173,7 +192,10 @@ def _add_options(func):
recce_cloud_options = [
click.option("--cloud", is_flag=True, help="Fetch the state file from cloud."),
click.option(
- "--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN"
+ "--cloud-token",
+ help="The GitHub token used by Recce Cloud.",
+ type=click.STRING,
+ envvar="GITHUB_TOKEN",
),
click.option(
"--state-file-host",
@@ -234,7 +256,10 @@ def _add_options(func):
"--session-id",
help="The session ID triggers this instance.",
type=click.STRING,
- envvar=["RECCE_SESSION_ID", "RECCE_SNAPSHOT_ID"], # Backward compatibility with RECCE_SNAPSHOT_ID
+ envvar=[
+ "RECCE_SESSION_ID",
+ "RECCE_SNAPSHOT_ID",
+ ], # Backward compatibility with RECCE_SNAPSHOT_ID
hidden=True,
),
]
@@ -271,7 +296,10 @@ def cli(ctx, **kwargs):
error_console.print(
f"[[yellow]Update Available[/yellow]] A new version of Recce {__latest_version__} is available.",
)
- error_console.print("Please update using the command: 'pip install --upgrade recce'.", end="\n\n")
+ error_console.print(
+ "Please update using the command: 'pip install --upgrade recce'.",
+ end="\n\n",
+ )
@cli.command(cls=TrackCommand)
@@ -400,7 +428,10 @@ def init(cache_db, **kwargs):
target_base_path.mkdir(parents=True, exist_ok=True)
# Download current session artifacts
- for artifact_key, filename in [("manifest_url", "manifest.json"), ("catalog_url", "catalog.json")]:
+ for artifact_key, filename in [
+ ("manifest_url", "manifest.json"),
+ ("catalog_url", "catalog.json"),
+ ]:
url = download_urls.get(artifact_key)
if url:
try:
@@ -423,7 +454,10 @@ def init(cache_db, **kwargs):
except RecceCloudException as e:
console.print(f" [[yellow]Warning[/yellow]] Failed to get base session URLs: {e}")
base_download_urls = {}
- for artifact_key, filename in [("manifest_url", "manifest.json"), ("catalog_url", "catalog.json")]:
+ for artifact_key, filename in [
+ ("manifest_url", "manifest.json"),
+ ("catalog_url", "catalog.json"),
+ ]:
url = base_download_urls.get(artifact_key)
if url:
try:
@@ -776,7 +810,12 @@ def _to_dict(artifact):
else artifact
)
- for env_name, manifest, catalog, cross_catalog in envs_to_emit:
+ for (
+ env_name,
+ manifest,
+ catalog,
+ cross_catalog,
+ ) in envs_to_emit:
if manifest is None:
continue
manifest_dict = manifest.to_dict() if hasattr(manifest, "to_dict") else manifest
@@ -1087,12 +1126,24 @@ def _split_comma_separated(ctx, param, value):
help="Comma-separated list of primary key columns.",
callback=_split_comma_separated,
)
-@click.option("--keep-shape", is_flag=True, help="Keep unchanged columns. Otherwise, unchanged columns are hidden.")
@click.option(
- "--keep-equal", is_flag=True, help='Keep values that are equal. Otherwise, equal values are shown as "-".'
+ "--keep-shape",
+ is_flag=True,
+ help="Keep unchanged columns. Otherwise, unchanged columns are hidden.",
+)
+@click.option(
+ "--keep-equal",
+ is_flag=True,
+ help='Keep values that are equal. Otherwise, equal values are shown as "-".',
)
@add_options(dbt_related_options)
-def diff(sql, primary_keys: List[str] = None, keep_shape: bool = False, keep_equal: bool = False, **kwargs):
+def diff(
+ sql,
+ primary_keys: List[str] = None,
+ keep_shape: bool = False,
+ keep_equal: bool = False,
+ **kwargs,
+):
"""
Run queries on base and current environments and diff the results
@@ -1114,7 +1165,10 @@ def diff(sql, primary_keys: List[str] = None, keep_shape: bool = False, keep_equ
before_aligned, after_aligned = before.align(after)
diff = before_aligned.compare(
- after_aligned, result_names=("base", "current"), keep_equal=keep_equal, keep_shape=keep_shape
+ after_aligned,
+ result_names=("base", "current"),
+ keep_equal=keep_equal,
+ keep_shape=keep_shape,
)
print(diff.to_string(na_rep="-") if not diff.empty else "no changes")
@@ -1123,7 +1177,13 @@ def diff(sql, primary_keys: List[str] = None, keep_shape: bool = False, keep_equ
@click.argument("state_file", required=False)
@click.option("--host", default="localhost", show_default=True, help="The host to bind to.")
@click.option("--port", default=8000, show_default=True, help="The port to bind to.", type=int)
-@click.option("--lifetime", default=0, show_default=True, help="The lifetime of the server in seconds.", type=int)
+@click.option(
+ "--lifetime",
+ default=0,
+ show_default=True,
+ help="The lifetime of the server in seconds.",
+ type=int,
+)
@click.option(
"--idle-timeout",
default=0,
@@ -1366,10 +1426,15 @@ def server(host, port, lifetime, idle_timeout=0, state_file=None, **kwargs):
envvar="GITHUB_HEAD_REF",
)
@click.option(
- "--git-base-branch", help="The git branch of the base environment.", type=click.STRING, envvar="GITHUB_BASE_REF"
+ "--git-base-branch",
+ help="The git branch of the base environment.",
+ type=click.STRING,
+ envvar="GITHUB_BASE_REF",
)
@click.option(
- "--github-pull-request-url", help="The github pull request url to use for the lineage.", type=click.STRING
+ "--github-pull-request-url",
+ help="The github pull request url to use for the lineage.",
+ type=click.STRING,
)
@add_options(dbt_related_options)
@add_options(sqlmesh_related_options)
@@ -1448,7 +1513,6 @@ def run(output, **kwargs):
# Verify the output state file path
try:
if os.path.isdir(output) or output.endswith("/"):
-
output_dir = Path(output)
# Create the directory if not exists
output_dir.mkdir(parents=True, exist_ok=True)
@@ -1507,7 +1571,10 @@ def summary(state_file, **kwargs):
)
state_loader = create_state_loader(
- review_mode=True, cloud_mode=cloud_mode, state_file=state_file, cloud_options=cloud_options
+ review_mode=True,
+ cloud_mode=cloud_mode,
+ state_file=state_file,
+ cloud_options=cloud_options,
)
state_loader.load()
@@ -1566,7 +1633,12 @@ def cloud(**kwargs):
@cloud.command(cls=TrackCommand)
-@click.option("--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN")
+@click.option(
+ "--cloud-token",
+ help="The GitHub token used by Recce Cloud.",
+ type=click.STRING,
+ envvar="GITHUB_TOKEN",
+)
@click.option(
"--state-file-host",
help="The host to fetch the state file from.",
@@ -1582,7 +1654,12 @@ def cloud(**kwargs):
type=click.STRING,
envvar="RECCE_STATE_PASSWORD",
)
-@click.option("--force", "-f", help="Bypasses the confirmation prompt. Purge the state file directly.", is_flag=True)
+@click.option(
+ "--force",
+ "-f",
+ help="Bypasses the confirmation prompt. Purge the state file directly.",
+ is_flag=True,
+)
@add_options(recce_options)
def purge(**kwargs):
"""
@@ -1605,7 +1682,10 @@ def purge(**kwargs):
try:
console.rule("Check Recce State from Cloud")
state_loader = create_state_loader(
- review_mode=False, cloud_mode=True, state_file=None, cloud_options=cloud_options
+ review_mode=False,
+ cloud_mode=True,
+ state_file=None,
+ cloud_options=cloud_options,
)
state_loader.load()
except Exception:
@@ -1653,7 +1733,12 @@ def purge(**kwargs):
@cloud.command(cls=TrackCommand)
@click.argument("state_file", type=click.Path(exists=True))
-@click.option("--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN")
+@click.option(
+ "--cloud-token",
+ help="The GitHub token used by Recce Cloud.",
+ type=click.STRING,
+ envvar="GITHUB_TOKEN",
+)
@click.option(
"--state-file-host",
help="The host to fetch the state file from.",
@@ -1689,7 +1774,10 @@ def upload(state_file, **kwargs):
# load local state
state_loader = create_state_loader(
- review_mode=False, cloud_mode=False, state_file=state_file, cloud_options=cloud_options
+ review_mode=False,
+ cloud_mode=False,
+ state_file=state_file,
+ cloud_options=cloud_options,
)
state_loader.load()
@@ -1724,7 +1812,12 @@ def upload(state_file, **kwargs):
default=DEFAULT_RECCE_STATE_FILE,
show_default=True,
)
-@click.option("--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN")
+@click.option(
+ "--cloud-token",
+ help="The GitHub token used by Recce Cloud.",
+ type=click.STRING,
+ envvar="GITHUB_TOKEN",
+)
@click.option(
"--state-file-host",
help="The host to fetch the state file from.",
@@ -1778,7 +1871,12 @@ def download(**kwargs):
@cloud.command(cls=TrackCommand)
-@click.option("--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN")
+@click.option(
+ "--cloud-token",
+ help="The GitHub token used by Recce Cloud.",
+ type=click.STRING,
+ envvar="GITHUB_TOKEN",
+)
@click.option(
"--branch",
"-b",
@@ -1825,7 +1923,11 @@ def upload_artifacts(**kwargs):
try:
rc = upload_dbt_artifacts(
- target_path, branch=branch, token=cloud_token, password=password, debug=kwargs.get("debug", False)
+ target_path,
+ branch=branch,
+ token=cloud_token,
+ password=password,
+ debug=kwargs.get("debug", False),
)
console.rule("Uploaded Successfully")
console.print(
@@ -1878,7 +1980,12 @@ def _download_artifacts(branch, cloud_token, console, kwargs, password, target_p
@cloud.command(cls=TrackCommand)
-@click.option("--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN")
+@click.option(
+ "--cloud-token",
+ help="The GitHub token used by Recce Cloud.",
+ type=click.STRING,
+ envvar="GITHUB_TOKEN",
+)
@click.option(
"--branch",
"-b",
@@ -1901,7 +2008,12 @@ def _download_artifacts(branch, cloud_token, console, kwargs, password, target_p
envvar="RECCE_STATE_PASSWORD",
required=True,
)
-@click.option("--force", "-f", help="Bypasses the confirmation prompt. Download the artifacts directly.", is_flag=True)
+@click.option(
+ "--force",
+ "-f",
+ help="Bypasses the confirmation prompt. Download the artifacts directly.",
+ is_flag=True,
+)
@add_options(recce_options)
def download_artifacts(**kwargs):
"""
@@ -1926,7 +2038,12 @@ def download_artifacts(**kwargs):
@cloud.command(cls=TrackCommand)
-@click.option("--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN")
+@click.option(
+ "--cloud-token",
+ help="The GitHub token used by Recce Cloud.",
+ type=click.STRING,
+ envvar="GITHUB_TOKEN",
+)
@click.option(
"--branch",
"-b",
@@ -1949,7 +2066,12 @@ def download_artifacts(**kwargs):
envvar="RECCE_STATE_PASSWORD",
required=True,
)
-@click.option("--force", "-f", help="Bypasses the confirmation prompt. Download the artifacts directly.", is_flag=True)
+@click.option(
+ "--force",
+ "-f",
+ help="Bypasses the confirmation prompt. Download the artifacts directly.",
+ is_flag=True,
+)
@add_options(recce_options)
def download_base_artifacts(**kwargs):
"""
@@ -1980,7 +2102,12 @@ def download_base_artifacts(**kwargs):
@cloud.command(cls=TrackCommand)
-@click.option("--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN")
+@click.option(
+ "--cloud-token",
+ help="The GitHub token used by Recce Cloud.",
+ type=click.STRING,
+ envvar="GITHUB_TOKEN",
+)
@click.option(
"--branch",
"-b",
@@ -1988,7 +2115,12 @@ def download_base_artifacts(**kwargs):
type=click.STRING,
envvar="GITHUB_HEAD_REF",
)
-@click.option("--force", "-f", help="Bypasses the confirmation prompt. Delete the artifacts directly.", is_flag=True)
+@click.option(
+ "--force",
+ "-f",
+ help="Bypasses the confirmation prompt. Delete the artifacts directly.",
+ is_flag=True,
+)
@add_options(recce_options)
def delete_artifacts(**kwargs):
"""
@@ -2032,7 +2164,12 @@ def delete_artifacts(**kwargs):
@cloud.command(cls=TrackCommand, name="list-organizations")
-@click.option("--api-token", help="The Recce Cloud API token.", type=click.STRING, envvar="RECCE_API_TOKEN")
+@click.option(
+ "--api-token",
+ help="The Recce Cloud API token.",
+ type=click.STRING,
+ envvar="RECCE_API_TOKEN",
+)
@add_options(recce_options)
def list_organizations(**kwargs):
"""
@@ -2092,7 +2229,12 @@ def list_organizations(**kwargs):
type=click.STRING,
envvar="RECCE_ORGANIZATION_ID",
)
-@click.option("--api-token", help="The Recce Cloud API token.", type=click.STRING, envvar="RECCE_API_TOKEN")
+@click.option(
+ "--api-token",
+ help="The Recce Cloud API token.",
+ type=click.STRING,
+ envvar="RECCE_API_TOKEN",
+)
@add_options(recce_options)
def list_projects(**kwargs):
"""
@@ -2151,7 +2293,11 @@ def list_projects(**kwargs):
table.add_column("Display Name", style="yellow")
for project in projects:
- table.add_row(str(project.get("id", "")), project.get("name", ""), project.get("display_name", ""))
+ table.add_row(
+ str(project.get("id", "")),
+ project.get("name", ""),
+ project.get("display_name", ""),
+ )
console.print(table)
@@ -2178,7 +2324,12 @@ def list_projects(**kwargs):
type=click.STRING,
envvar="RECCE_PROJECT_ID",
)
-@click.option("--api-token", help="The Recce Cloud API token.", type=click.STRING, envvar="RECCE_API_TOKEN")
+@click.option(
+ "--api-token",
+ help="The Recce Cloud API token.",
+ type=click.STRING,
+ envvar="RECCE_API_TOKEN",
+)
@add_options(recce_options)
def list_sessions(**kwargs):
"""
@@ -2270,7 +2421,8 @@ def github(**kwargs):
@github.command(
- cls=TrackCommand, short_help="Download the artifacts from the GitHub repository based on the current Pull Request."
+ cls=TrackCommand,
+ short_help="Download the artifacts from the GitHub repository based on the current Pull Request.",
)
@click.option(
"--github-token",
@@ -2328,7 +2480,10 @@ def share(state_file, **kwargs):
# load local state
state_loader = create_state_loader(
- review_mode=True, cloud_mode=False, state_file=state_file, cloud_options=cloud_options
+ review_mode=True,
+ cloud_mode=False,
+ state_file=state_file,
+ cloud_options=cloud_options,
)
state_loader.load()
@@ -2431,7 +2586,10 @@ def upload_session(**kwargs):
try:
rc = upload_artifacts_to_session(
- target_path, session_id=session_id, token=api_token, debug=kwargs.get("debug", False)
+ target_path,
+ session_id=session_id,
+ token=api_token,
+ debug=kwargs.get("debug", False),
)
console.rule("Uploaded Successfully")
console.print(
@@ -2464,8 +2622,19 @@ def snapshot(**kwargs):
@click.argument("state_file", required=True)
@click.option("--host", default="localhost", show_default=True, help="The host to bind to.")
@click.option("--port", default=8000, show_default=True, help="The port to bind to.", type=int)
-@click.option("--lifetime", default=0, show_default=True, help="The lifetime of the server in seconds.", type=int)
-@click.option("--share-url", help="The share URL triggers this instance.", type=click.STRING, envvar="RECCE_SHARE_URL")
+@click.option(
+ "--lifetime",
+ default=0,
+ show_default=True,
+ help="The lifetime of the server in seconds.",
+ type=int,
+)
+@click.option(
+ "--share-url",
+ help="The share URL triggers this instance.",
+ type=click.STRING,
+ envvar="RECCE_SHARE_URL",
+)
@click.pass_context
def read_only(ctx, state_file=None, **kwargs):
from recce.server import RecceServerMode
@@ -2477,10 +2646,24 @@ def read_only(ctx, state_file=None, **kwargs):
@cli.command(cls=TrackCommand)
@click.argument("state_file", required=False)
-@click.option("--sse", is_flag=True, default=False, help="Start in HTTP/SSE mode instead of stdio mode")
-@click.option("--host", default="localhost", help="Host to bind to in SSE mode (default: localhost)")
+@click.option(
+ "--sse",
+ is_flag=True,
+ default=False,
+ help="Start in HTTP/SSE mode instead of stdio mode",
+)
+@click.option(
+ "--host",
+ default="localhost",
+ help="Host to bind to in SSE mode (default: localhost)",
+)
@click.option("--port", default=8000, type=int, help="Port to bind to in SSE mode (default: 8000)")
-@click.option("--session", "cloud_session", type=click.STRING, help="Recce Cloud session ID for cloud MCP mode")
+@click.option(
+ "--session",
+ "cloud_session",
+ type=click.STRING,
+ help="Recce Cloud session ID for cloud MCP mode",
+)
@add_options(dbt_related_options)
@add_options(sqlmesh_related_options)
@add_options(recce_options)
@@ -2761,5 +2944,199 @@ def clear_cache(cache_db):
pass
+def resolve_target_base_path(
+ project_dir: str | None,
+ target_base_path: str,
+) -> str:
+ """Resolve ``target_base_path`` against ``project_dir`` like every dbt-aware
+ command does.
+
+ An absolute ``target_base_path`` bypasses the join. Relative paths are
+ joined onto ``project_dir`` (or CWD if ``project_dir`` is None).
+
+ Shared by the ``recce check-base`` CLI command and the ``recce mcp-server``
+ startup freshness check so they cannot drift — having two copies of the
+ join logic was the root cause of the round-2 review finding (CLI was
+ fixed, MCP startup was missed).
+ """
+ base = Path(target_base_path)
+ if base.is_absolute():
+ return str(base)
+ return str(Path(project_dir or "./") / base)
+
+
+def check_base_freshness(
+ target_base_path: str = "target-base",
+ freshness_threshold_hours: float = 48.0,
+) -> dict:
+ """
+ Check whether the base artifacts in target_base_path are fresh.
+
+ Returns a dict with keys:
+ status: "fresh" | "stale_time" | "stale_sha" | "missing"
+ recommendation: "reuse" | "docs_generate" | "full_build"
+ message: human-readable explanation
+ artifact_age_hours: float or None
+ base_sha: str or None (DBT_GIT_SHA from manifest metadata)
+ current_sha: str or None (current HEAD SHA)
+ threshold_hours: float
+ """
+ import json
+ import logging
+ import time
+
+ _logger = logging.getLogger("recce")
+
+ manifest_path = Path(target_base_path) / "manifest.json"
+ result: dict = {
+ "status": None,
+ "recommendation": None,
+ "message": None,
+ "artifact_age_hours": None,
+ "base_sha": None,
+ "current_sha": None,
+ "threshold_hours": freshness_threshold_hours,
+ }
+
+ if not manifest_path.exists():
+ result["status"] = "missing"
+ result["recommendation"] = "full_build"
+ result["message"] = (
+ f"Base artifacts not found at '{target_base_path}/manifest.json'. "
+ f"Run: git stash; git checkout ; dbt build --target-path {target_base_path}; "
+ "git checkout ; git stash pop"
+ )
+ return result
+
+ # Compute artifact age from mtime
+ mtime = manifest_path.stat().st_mtime
+ now = time.time()
+ artifact_age_hours = (now - mtime) / 3600.0
+ result["artifact_age_hours"] = artifact_age_hours
+
+ # Time-based freshness check
+ if artifact_age_hours > freshness_threshold_hours:
+ result["status"] = "stale_time"
+ result["recommendation"] = "docs_generate"
+ result["message"] = (
+ f"Base artifacts are stale ({artifact_age_hours:.1f} hours old, "
+ f"threshold: {freshness_threshold_hours}h). "
+ f"Run: dbt docs generate --target-path {target_base_path}"
+ )
+ return result
+
+ # SHA-based freshness check (best-effort: skip if field absent or git unavailable)
+ try:
+ with open(manifest_path) as f:
+ manifest_data = json.load(f)
+ base_sha = manifest_data.get("metadata", {}).get("env", {}).get("DBT_GIT_SHA")
+ result["base_sha"] = base_sha
+
+ if base_sha is not None:
+ from recce.git import current_commit_hash
+
+ current_sha = current_commit_hash()
+ result["current_sha"] = current_sha
+ if current_sha and base_sha != current_sha:
+ result["status"] = "stale_sha"
+ result["recommendation"] = "docs_generate"
+ result["message"] = (
+ f"Base artifacts are stale (generated at {base_sha[:7]}, "
+ f"current HEAD: {current_sha[:7]}). "
+ f"Run: dbt docs generate --target-path {target_base_path}"
+ )
+ return result
+ except Exception as e:
+ # Best-effort: if manifest is unreadable or git is unavailable, skip SHA check.
+ # Log at debug so the reason is recoverable without surfacing in normal output.
+ _logger.debug(f"check_base_freshness: SHA check skipped ({e})")
+
+ result["status"] = "fresh"
+ result["recommendation"] = "reuse"
+ result["message"] = f"Base artifacts are fresh ({artifact_age_hours:.1f} hours old). " "Reuse existing artifacts."
+ return result
+
+
+@cli.command(name="check-base", cls=TrackCommand)
+@click.option(
+ "--project-dir",
+ help="Which directory to look in for the dbt_project.yml file. "
+ "target-base-path is resolved relative to this when not absolute.",
+ type=click.Path(),
+ envvar="DBT_PROJECT_DIR",
+)
+@click.option(
+ "--target-base-path",
+ default="target-base",
+ show_default=True,
+ help="Path to the base artifacts directory (relative to --project-dir if not absolute).",
+ type=click.Path(),
+)
+@click.option(
+ "--format",
+ "output_format",
+ default="json",
+ show_default=True,
+ type=click.Choice(["json", "text"]),
+ help="Output format: json (default) or text for human-readable output.",
+)
+@click.option(
+ "--freshness-threshold-hours",
+ default=48.0,
+ show_default=True,
+ type=float,
+ help="Age threshold in hours after which artifacts are considered stale_time.",
+)
+def check_base(project_dir, target_base_path, output_format, freshness_threshold_hours):
+ """Check freshness of base artifacts for diff operations.
+
+ Exit codes:
+ 0 - fresh
+ 1 - missing (rebuild required: full_build)
+ 2 - stale_time / stale_sha (regenerate when convenient: docs_generate)
+ """
+ import json
+
+ # Honor --project-dir / DBT_PROJECT_DIR like every other dbt-aware command.
+ resolved_target_base = resolve_target_base_path(project_dir, target_base_path)
+
+ result = check_base_freshness(
+ target_base_path=resolved_target_base,
+ freshness_threshold_hours=freshness_threshold_hours,
+ )
+
+ if output_format == "json":
+ click.echo(json.dumps(result, indent=2))
+ else:
+ from rich.console import Console
+
+ console = Console()
+ status = result["status"]
+ age = result.get("artifact_age_hours")
+ msg = result.get("message", "")
+ color_map = {
+ "fresh": "green",
+ "stale_time": "yellow",
+ "stale_sha": "yellow",
+ "missing": "red",
+ }
+ color = color_map.get(status, "white")
+ console.print(f"[{color}]Status: {status}[/{color}]")
+ if age is not None:
+ console.print(f"Age: {age:.1f}h (threshold: {result['threshold_hours']}h)")
+ console.print(f"Recommendation: {result['recommendation']}")
+ if msg:
+ console.print(msg)
+
+ # Distinct exit codes so shell automation can branch without parsing JSON.
+ status = result["status"]
+ if status == "fresh":
+ return
+ if status == "missing":
+ raise SystemExit(1)
+ # stale_time, stale_sha (and any future non-fresh status)
+ raise SystemExit(2)
+
+
if __name__ == "__main__":
cli()
diff --git a/recce/mcp_server.py b/recce/mcp_server.py
index 3034db31b..9b59ad1d3 100644
--- a/recce/mcp_server.py
+++ b/recce/mcp_server.py
@@ -205,7 +205,11 @@ async def _tool_run_check(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
check_id = arguments.get("check_id")
if not check_id:
raise ValueError("check_id is required")
- run = await self._request("POST", f"checks/{quote(str(check_id), safe='')}/run", json={"nowait": False})
+ run = await self._request(
+ "POST",
+ f"checks/{quote(str(check_id), safe='')}/run",
+ json={"nowait": False},
+ )
if self._run_succeeded(run):
await self._auto_approve(check_id)
return run
@@ -230,12 +234,20 @@ async def _tool_create_check(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
# executable run). lineage_diff/schema_diff are recorded server-side via
# POST /checks/{id}/run, mirroring local _create_metadata_run.
if check_id and check_type != "simple":
- run = await self._request("POST", f"checks/{quote(str(check_id), safe='')}/run", json={"nowait": False})
+ run = await self._request(
+ "POST",
+ f"checks/{quote(str(check_id), safe='')}/run",
+ json={"nowait": False},
+ )
run_executed = True
run_error = run.get("error")
if self._run_succeeded(run):
await self._auto_approve(check_id)
- result = {"check_id": str(check_id), "created": True, "run_executed": run_executed}
+ result = {
+ "check_id": str(check_id),
+ "created": True,
+ "run_executed": run_executed,
+ }
if run_error:
result["run_error"] = run_error
return result
@@ -248,7 +260,11 @@ async def _auto_approve(self, check_id) -> None:
post-success side-effect, not part of the run contract.
"""
try:
- await self._request("PATCH", f"checks/{quote(str(check_id), safe='')}", json={"is_checked": True})
+ await self._request(
+ "PATCH",
+ f"checks/{quote(str(check_id), safe='')}",
+ json={"is_checked": True},
+ )
except (RecceCloudException, InstanceSpawningError) as e:
logger.warning(f"[MCP] Auto-approve failed for check {check_id}: {e}")
@@ -292,7 +308,10 @@ async def _tool_lineage_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
if source in id_to_idx and target in id_to_idx:
edge_rows.append((id_to_idx[source], id_to_idx[target]))
edges_df = DataFrame.from_data(columns={"from": "integer", "to": "integer"}, data=edge_rows)
- return {"nodes": nodes_df.model_dump(mode="json"), "edges": edges_df.model_dump(mode="json")}
+ return {
+ "nodes": nodes_df.model_dump(mode="json"),
+ "edges": edges_df.model_dump(mode="json"),
+ }
async def _tool_schema_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
info = await self._request("GET", "info")
@@ -316,7 +335,10 @@ async def _tool_schema_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
async def _tool_impact_analysis(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
info = await self._request("GET", "info")
nodes = info.get("lineage", {}).get("nodes", {})
- select = arguments.get("select", "state:modified.body+ state:modified.macros+ state:modified.contract+")
+ select = arguments.get(
+ "select",
+ "state:modified.body+ state:modified.macros+ state:modified.contract+",
+ )
impacted_node_ids = set((await self._request("POST", "select", json={"select": select})).get("nodes", []))
modified_node_ids = set(
(await self._request("POST", "select", json={"select": "state:modified"})).get("nodes", [])
@@ -682,7 +704,15 @@ async def list_tools() -> List[Tool]:
description="Get server context information including current backend mode "
"('local', 'cloud', or 'none' when unconfigured), adapter type, git branch, "
"supported tasks, and review mode status. Useful for diagnostics and "
- "understanding which diff tools are available.",
+ "understanding which diff tools are available.\n\n"
+ "The 'base_status' field is a single-cased enum (all lowercase) reporting "
+ "whether the local target-base/ artifacts are usable for diff operations:\n"
+ " - 'fresh': artifacts are within the freshness threshold and SHA matches; safe to diff.\n"
+ " - 'stale_time': artifacts older than the freshness threshold; diff results may be outdated.\n"
+ " - 'stale_sha': artifacts were generated against a different git SHA than current HEAD.\n"
+ " - 'missing': no manifest.json under target-base/; diffs will fail until rebuilt.\n"
+ " - 'single_env': server is running in single-env mode; diff is not applicable.\n"
+ " - 'unknown': freshness check did not run (e.g., cloud mode or check skipped).",
inputSchema={
"type": "object",
"properties": {},
@@ -2039,6 +2069,14 @@ async def _tool_get_server_info(self, arguments: Dict[str, Any]) -> Dict[str, An
"single_env": self.single_env,
}
+ # Include base_status so agents can programmatically detect stale state.
+ # All-lowercase enum (matches the recommendation field's casing):
+ # "fresh" | "stale_time" | "stale_sha" | "missing" | "single_env" | "unknown"
+ if self.single_env:
+ result["base_status"] = "single_env"
+ else:
+ result["base_status"] = getattr(self, "_base_status", "unknown")
+
# Add git and pull_request info if state_loader is available
if context.state_loader:
try:
@@ -2122,7 +2160,10 @@ async def _tool_set_backend(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
else:
self.single_env = not base_path.is_dir()
- load_kwargs = {"target_path": target_path, "target_base_path": effective_base}
+ load_kwargs = {
+ "target_path": target_path,
+ "target_base_path": effective_base,
+ }
if project_dir:
load_kwargs["project_dir"] = project_dir
self.context = load_context(**load_kwargs)
@@ -2571,6 +2612,43 @@ async def run_mcp_server(
)
server._local_cache_key = cache_key
+ # Freshness check (M2, AC-3): warn on stale or missing base artifacts at startup.
+ # Lazy import to avoid circular import; best-effort — startup never fails here.
+ try:
+ from recce.cli import check_base_freshness, resolve_target_base_path
+
+ # Honor --project-dir like the CLI does. Without this, MCP
+ # startup looks for ./target-base/manifest.json relative to
+ # CWD, missing artifacts that exist at --project-dir-relative
+ # paths (or worse, picking up a stale manifest from another
+ # project that happens to live in CWD).
+ _tb = resolve_target_base_path(
+ kwargs.get("project_dir"),
+ kwargs.get("target_base_path", "target-base"),
+ )
+ _freshness = check_base_freshness(target_base_path=_tb)
+ server._base_status = _freshness.get("status", "fresh")
+ if server._base_status in ("stale_time", "stale_sha"):
+ _age = _freshness.get("artifact_age_hours") or 0
+ logger.warning(
+ f"Base artifacts are stale ({_age:.1f} hours old). "
+ "Diffs may not reflect the latest base. "
+ f"Run: dbt docs generate --target-path {_tb} "
+ "(or use `recce check-base` for full diagnosis)"
+ )
+ elif server._base_status == "missing":
+ # MISSING is the most actionable failure of the three: diffs
+ # will fail outright, not silently mislead. Surface it loudly
+ # alongside the rebuild path.
+ logger.warning(
+ f"Base artifacts not found at '{_tb}/manifest.json'. "
+ "Diff tools will fail until base artifacts are built. "
+ f"Run: dbt build --target-path {_tb} "
+ "(or use `recce check-base` for full diagnosis)"
+ )
+ except Exception as _e:
+ logger.debug(f"[MCP] Base freshness check skipped: {_e}")
+
# Run in either stdio or SSE mode
if sse:
# SSE mode: lifespan handler in Starlette manages shutdown and state export
diff --git a/tests/test_check_base.py b/tests/test_check_base.py
new file mode 100644
index 000000000..51e4260c0
--- /dev/null
+++ b/tests/test_check_base.py
@@ -0,0 +1,332 @@
+"""
+Tests for check_base_freshness() helper and the `recce check-base` CLI (M2, AC-3).
+
+Coverage:
+ Helper (`check_base_freshness()`):
+ - test_status_fresh — mtime within threshold → fresh
+ - test_status_stale_time — mtime > 48 h → stale_time
+ - test_status_stale_sha — SHA mismatch → stale_sha
+ - test_status_missing — no manifest.json → missing
+ - test_sha_absent_no_raise — R9 best-effort: missing DBT_GIT_SHA field → fresh
+
+ CLI (`recce check-base`):
+ - test_cli_json_schema_fresh — JSON shape includes the documented fields
+ - test_cli_text_format_renders — --format text prints status line
+ - test_cli_exit_code_fresh — fresh → exit 0
+ - test_cli_exit_code_missing — missing → exit 1
+ - test_cli_exit_code_stale_time — stale_time → exit 2
+ - test_cli_project_dir_resolves — --project-dir joins onto target-base-path
+
+ Helper (`resolve_target_base_path`):
+ - test_resolve_relative_joins_with_project_dir
+ - test_resolve_absolute_bypasses_project_dir
+ - test_resolve_no_project_dir_uses_cwd
+ - test_resolve_mcp_startup_finds_artifacts_under_project_dir
+"""
+
+import json
+import os
+import time
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+from click.testing import CliRunner
+
+from recce.cli import check_base, check_base_freshness, resolve_target_base_path
+
+
+@pytest.fixture()
+def fresh_manifest_dir(tmp_path):
+ """Create a target-base/ directory with a freshly-written manifest.json."""
+ target_base = tmp_path / "target-base"
+ target_base.mkdir()
+ manifest = {
+ "metadata": {
+ "generated_at": "2024-01-01T00:00:00.000000Z",
+ "env": {
+ "DBT_GIT_SHA": "abc1234def5678901234567890123456789012ab",
+ },
+ }
+ }
+ (target_base / "manifest.json").write_text(json.dumps(manifest))
+ return target_base
+
+
+@pytest.fixture()
+def old_manifest_dir(tmp_path):
+ """Create a target-base/ directory with a manifest.json whose mtime is 73 h ago."""
+ target_base = tmp_path / "target-base"
+ target_base.mkdir()
+ manifest_path = target_base / "manifest.json"
+ manifest_path.write_text(json.dumps({"metadata": {"env": {}}}))
+ # Back-date mtime by 73 hours
+ old_time = time.time() - (73 * 3600)
+ os.utime(manifest_path, (old_time, old_time))
+ return target_base
+
+
+# ---------------------------------------------------------------------------
+# Helper tests — exercise check_base_freshness() directly
+# ---------------------------------------------------------------------------
+
+
+def test_status_fresh(fresh_manifest_dir):
+ """Manifest mtime within threshold and matching SHA → fresh."""
+ manifest_sha = "abc1234def5678901234567890123456789012ab"
+ with patch("recce.git.current_commit_hash", return_value=manifest_sha):
+ result = check_base_freshness(
+ target_base_path=str(fresh_manifest_dir),
+ freshness_threshold_hours=48.0,
+ )
+ assert result["status"] == "fresh"
+ assert result["recommendation"] == "reuse"
+ assert result["artifact_age_hours"] is not None
+ assert result["artifact_age_hours"] < 48.0
+
+
+def test_status_stale_time(old_manifest_dir):
+ """Manifest mtime > 48 h threshold → stale_time, message contains 'stale'."""
+ result = check_base_freshness(
+ target_base_path=str(old_manifest_dir),
+ freshness_threshold_hours=48.0,
+ )
+ assert result["status"] == "stale_time"
+ assert result["recommendation"] == "docs_generate"
+ assert "stale" in result["message"].lower()
+ assert result["artifact_age_hours"] > 48.0
+
+
+def test_status_stale_sha(fresh_manifest_dir):
+ """SHA in manifest differs from current HEAD → stale_sha, message contains 'stale'."""
+ different_sha = "9999999deadbeef0000000000000000000000000"
+ with patch("recce.git.current_commit_hash", return_value=different_sha):
+ result = check_base_freshness(
+ target_base_path=str(fresh_manifest_dir),
+ freshness_threshold_hours=48.0,
+ )
+ assert result["status"] == "stale_sha"
+ assert result["recommendation"] == "docs_generate"
+ assert "stale" in result["message"].lower()
+
+
+def test_status_missing(tmp_path):
+ """No manifest.json in target_base_path → missing, recommendation full_build."""
+ non_existent = tmp_path / "target-base-empty"
+ result = check_base_freshness(
+ target_base_path=str(non_existent),
+ freshness_threshold_hours=48.0,
+ )
+ assert result["status"] == "missing"
+ assert result["recommendation"] == "full_build"
+ assert result["artifact_age_hours"] is None
+ # Suggested command must reference the user-supplied target_base_path,
+ # not the hardcoded default.
+ assert str(non_existent) in result["message"]
+
+
+def test_sha_absent_no_raise(tmp_path):
+ """R9 best-effort: DBT_GIT_SHA field absent in manifest → fresh, no exception raised."""
+ target_base = tmp_path / "target-base"
+ target_base.mkdir()
+ # Manifest has no DBT_GIT_SHA in metadata.env
+ manifest_no_sha = {
+ "metadata": {
+ "generated_at": "2024-01-01T00:00:00.000000Z",
+ "env": {}, # DBT_GIT_SHA is absent
+ }
+ }
+ (target_base / "manifest.json").write_text(json.dumps(manifest_no_sha))
+
+ # Should not raise, and should fall through to fresh (time check passes)
+ result = check_base_freshness(
+ target_base_path=str(target_base),
+ freshness_threshold_hours=48.0,
+ )
+ # With no DBT_GIT_SHA, SHA check is skipped → fresh (time check passed)
+ assert result["status"] == "fresh"
+ assert result["base_sha"] is None
+
+
+# ---------------------------------------------------------------------------
+# CLI tests — exercise `recce check-base` end-to-end via Click's CliRunner
+# ---------------------------------------------------------------------------
+
+
+def test_cli_json_schema_fresh(fresh_manifest_dir):
+ """--format json (default) emits all documented fields and lowercase status."""
+ manifest_sha = "abc1234def5678901234567890123456789012ab"
+ runner = CliRunner()
+ with patch("recce.git.current_commit_hash", return_value=manifest_sha):
+ result = runner.invoke(
+ check_base,
+ ["--target-base-path", str(fresh_manifest_dir)],
+ )
+ assert result.exit_code == 0, result.output
+ payload = json.loads(result.output)
+ # Documented schema — every key must be present.
+ expected_keys = {
+ "status",
+ "recommendation",
+ "message",
+ "artifact_age_hours",
+ "base_sha",
+ "current_sha",
+ "threshold_hours",
+ }
+ assert expected_keys.issubset(payload.keys())
+ assert payload["status"] == "fresh"
+ assert payload["recommendation"] == "reuse"
+
+
+def test_cli_text_format_renders(fresh_manifest_dir):
+ """--format text emits a human-readable status line."""
+ manifest_sha = "abc1234def5678901234567890123456789012ab"
+ runner = CliRunner()
+ with patch("recce.git.current_commit_hash", return_value=manifest_sha):
+ result = runner.invoke(
+ check_base,
+ [
+ "--target-base-path",
+ str(fresh_manifest_dir),
+ "--format",
+ "text",
+ ],
+ )
+ assert result.exit_code == 0, result.output
+ assert "Status: fresh" in result.output
+ assert "Recommendation: reuse" in result.output
+
+
+def test_cli_exit_code_fresh(fresh_manifest_dir):
+ """fresh → exit 0 (no SystemExit raised)."""
+ manifest_sha = "abc1234def5678901234567890123456789012ab"
+ runner = CliRunner()
+ with patch("recce.git.current_commit_hash", return_value=manifest_sha):
+ result = runner.invoke(
+ check_base,
+ ["--target-base-path", str(fresh_manifest_dir)],
+ )
+ assert result.exit_code == 0
+
+
+def test_cli_exit_code_missing(tmp_path):
+ """missing → exit 1 (rebuild required)."""
+ non_existent = tmp_path / "target-base-empty"
+ runner = CliRunner()
+ result = runner.invoke(
+ check_base,
+ ["--target-base-path", str(non_existent)],
+ )
+ assert result.exit_code == 1
+ payload = json.loads(result.output)
+ assert payload["status"] == "missing"
+ assert payload["recommendation"] == "full_build"
+
+
+def test_cli_exit_code_stale_time(old_manifest_dir):
+ """stale_time → exit 2 (regenerate when convenient)."""
+ runner = CliRunner()
+ result = runner.invoke(
+ check_base,
+ [
+ "--target-base-path",
+ str(old_manifest_dir),
+ "--freshness-threshold-hours",
+ "48",
+ ],
+ )
+ assert result.exit_code == 2
+ payload = json.loads(result.output)
+ assert payload["status"] == "stale_time"
+ assert payload["recommendation"] == "docs_generate"
+
+
+def test_cli_project_dir_resolves(tmp_path):
+ """--project-dir joins onto a relative --target-base-path before resolving."""
+ project_dir = tmp_path / "my_dbt_project"
+ project_dir.mkdir()
+ target_base = project_dir / "target-base"
+ target_base.mkdir()
+ manifest = {
+ "metadata": {
+ "env": {
+ "DBT_GIT_SHA": "abc1234def5678901234567890123456789012ab",
+ },
+ }
+ }
+ (target_base / "manifest.json").write_text(json.dumps(manifest))
+
+ manifest_sha = "abc1234def5678901234567890123456789012ab"
+ runner = CliRunner()
+ # Invoke from tmp_path with --project-dir; relative target-base-path "target-base"
+ # should resolve under project_dir.
+ with patch("recce.git.current_commit_hash", return_value=manifest_sha):
+ result = runner.invoke(
+ check_base,
+ [
+ "--project-dir",
+ str(project_dir),
+ "--target-base-path",
+ "target-base",
+ ],
+ )
+ assert result.exit_code == 0, result.output
+ payload = json.loads(result.output)
+ assert payload["status"] == "fresh"
+
+
+# ---------------------------------------------------------------------------
+# resolve_target_base_path() — shared by CLI and MCP startup so the join logic
+# cannot drift (round-2 review: MCP startup was missing the join after the CLI
+# was fixed).
+# ---------------------------------------------------------------------------
+
+
+def test_resolve_relative_joins_with_project_dir():
+ """A relative target-base-path is joined under project-dir."""
+ resolved = resolve_target_base_path("/foo/bar", "target-base")
+ assert Path(resolved) == Path("/foo/bar") / "target-base"
+
+
+def test_resolve_absolute_bypasses_project_dir():
+ """An absolute target-base-path bypasses the join entirely."""
+ resolved = resolve_target_base_path("/foo/bar", "/tmp/abs/target-base")
+ assert Path(resolved) == Path("/tmp/abs/target-base")
+
+
+def test_resolve_no_project_dir_uses_cwd():
+ """When project_dir is None, resolution is relative to CWD ('./')."""
+ resolved = resolve_target_base_path(None, "target-base")
+ # Don't compare against a CWD-dependent absolute path; just verify the
+ # relative path semantics: joining ./ with target-base.
+ assert Path(resolved) == Path("./") / "target-base"
+
+
+def test_resolve_mcp_startup_finds_artifacts_under_project_dir(tmp_path):
+ """Regression for the round-2 review finding: MCP startup must use the
+ same resolution as the CLI so artifacts under --project-dir are found.
+
+ Mirrors test_cli_project_dir_resolves: builds a fresh manifest at
+ {project_dir}/target-base/manifest.json, then asserts that the resolution
+ helper produces a path whose freshness check returns 'fresh'. Without the
+ helper, MCP startup would look at ./target-base relative to CWD and miss
+ the artifact entirely.
+ """
+ project_dir = tmp_path / "my_dbt_project"
+ project_dir.mkdir()
+ target_base = project_dir / "target-base"
+ target_base.mkdir()
+ manifest_sha = "abc1234def5678901234567890123456789012ab"
+ manifest = {"metadata": {"env": {"DBT_GIT_SHA": manifest_sha}}}
+ (target_base / "manifest.json").write_text(json.dumps(manifest))
+
+ # The MCP startup-equivalent invocation: pass project_dir + relative
+ # target_base_path to the shared helper, then run the freshness check.
+ resolved = resolve_target_base_path(str(project_dir), "target-base")
+ assert Path(resolved) == target_base
+
+ with patch("recce.git.current_commit_hash", return_value=manifest_sha):
+ result = check_base_freshness(target_base_path=resolved)
+
+ assert result["status"] == "fresh", result