diff --git a/recce/cli.py b/recce/cli.py index d72c797b2..b839079a2 100644 --- a/recce/cli.py +++ b/recce/cli.py @@ -134,7 +134,12 @@ def _add_options(func): dbt_related_options = [ - click.option("--target", "-t", help="Which target to load for the given profile.", type=click.STRING), + click.option( + "--target", + "-t", + help="Which target to load for the given profile.", + type=click.STRING, + ), click.option("--profile", help="Which existing profile to load.", type=click.STRING), click.option( "--project-dir", @@ -152,8 +157,18 @@ def _add_options(func): sqlmesh_related_options = [ click.option("--sqlmesh", is_flag=True, help="Use SQLMesh ", hidden=True), - click.option("--sqlmesh-envs", is_flag=False, help="SQLMesh envs to compare. SOURCE:TARGET", hidden=True), - click.option("--sqlmesh-config", is_flag=False, help="SQLMesh config name to use", hidden=True), + click.option( + "--sqlmesh-envs", + is_flag=False, + help="SQLMesh envs to compare. SOURCE:TARGET", + hidden=True, + ), + click.option( + "--sqlmesh-config", + is_flag=False, + help="SQLMesh config name to use", + hidden=True, + ), ] recce_options = [ @@ -165,7 +180,11 @@ def _add_options(func): show_default=True, ), click.option( - "--error-log", help="Path to the error log file.", type=click.Path(), default=RECCE_ERROR_LOG_FILE, hidden=True + "--error-log", + help="Path to the error log file.", + type=click.Path(), + default=RECCE_ERROR_LOG_FILE, + hidden=True, ), click.option("--debug", is_flag=True, help="Enable debug mode.", hidden=True), ] @@ -173,7 +192,10 @@ def _add_options(func): recce_cloud_options = [ click.option("--cloud", is_flag=True, help="Fetch the state file from cloud."), click.option( - "--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN" + "--cloud-token", + help="The GitHub token used by Recce Cloud.", + type=click.STRING, + envvar="GITHUB_TOKEN", ), click.option( "--state-file-host", @@ -234,7 +256,10 @@ def _add_options(func): "--session-id", help="The session ID triggers this instance.", type=click.STRING, - envvar=["RECCE_SESSION_ID", "RECCE_SNAPSHOT_ID"], # Backward compatibility with RECCE_SNAPSHOT_ID + envvar=[ + "RECCE_SESSION_ID", + "RECCE_SNAPSHOT_ID", + ], # Backward compatibility with RECCE_SNAPSHOT_ID hidden=True, ), ] @@ -271,7 +296,10 @@ def cli(ctx, **kwargs): error_console.print( f"[[yellow]Update Available[/yellow]] A new version of Recce {__latest_version__} is available.", ) - error_console.print("Please update using the command: 'pip install --upgrade recce'.", end="\n\n") + error_console.print( + "Please update using the command: 'pip install --upgrade recce'.", + end="\n\n", + ) @cli.command(cls=TrackCommand) @@ -400,7 +428,10 @@ def init(cache_db, **kwargs): target_base_path.mkdir(parents=True, exist_ok=True) # Download current session artifacts - for artifact_key, filename in [("manifest_url", "manifest.json"), ("catalog_url", "catalog.json")]: + for artifact_key, filename in [ + ("manifest_url", "manifest.json"), + ("catalog_url", "catalog.json"), + ]: url = download_urls.get(artifact_key) if url: try: @@ -423,7 +454,10 @@ def init(cache_db, **kwargs): except RecceCloudException as e: console.print(f" [[yellow]Warning[/yellow]] Failed to get base session URLs: {e}") base_download_urls = {} - for artifact_key, filename in [("manifest_url", "manifest.json"), ("catalog_url", "catalog.json")]: + for artifact_key, filename in [ + ("manifest_url", "manifest.json"), + ("catalog_url", "catalog.json"), + ]: url = base_download_urls.get(artifact_key) if url: try: @@ -776,7 +810,12 @@ def _to_dict(artifact): else artifact ) - for env_name, manifest, catalog, cross_catalog in envs_to_emit: + for ( + env_name, + manifest, + catalog, + cross_catalog, + ) in envs_to_emit: if manifest is None: continue manifest_dict = manifest.to_dict() if hasattr(manifest, "to_dict") else manifest @@ -1087,12 +1126,24 @@ def _split_comma_separated(ctx, param, value): help="Comma-separated list of primary key columns.", callback=_split_comma_separated, ) -@click.option("--keep-shape", is_flag=True, help="Keep unchanged columns. Otherwise, unchanged columns are hidden.") @click.option( - "--keep-equal", is_flag=True, help='Keep values that are equal. Otherwise, equal values are shown as "-".' + "--keep-shape", + is_flag=True, + help="Keep unchanged columns. Otherwise, unchanged columns are hidden.", +) +@click.option( + "--keep-equal", + is_flag=True, + help='Keep values that are equal. Otherwise, equal values are shown as "-".', ) @add_options(dbt_related_options) -def diff(sql, primary_keys: List[str] = None, keep_shape: bool = False, keep_equal: bool = False, **kwargs): +def diff( + sql, + primary_keys: List[str] = None, + keep_shape: bool = False, + keep_equal: bool = False, + **kwargs, +): """ Run queries on base and current environments and diff the results @@ -1114,7 +1165,10 @@ def diff(sql, primary_keys: List[str] = None, keep_shape: bool = False, keep_equ before_aligned, after_aligned = before.align(after) diff = before_aligned.compare( - after_aligned, result_names=("base", "current"), keep_equal=keep_equal, keep_shape=keep_shape + after_aligned, + result_names=("base", "current"), + keep_equal=keep_equal, + keep_shape=keep_shape, ) print(diff.to_string(na_rep="-") if not diff.empty else "no changes") @@ -1123,7 +1177,13 @@ def diff(sql, primary_keys: List[str] = None, keep_shape: bool = False, keep_equ @click.argument("state_file", required=False) @click.option("--host", default="localhost", show_default=True, help="The host to bind to.") @click.option("--port", default=8000, show_default=True, help="The port to bind to.", type=int) -@click.option("--lifetime", default=0, show_default=True, help="The lifetime of the server in seconds.", type=int) +@click.option( + "--lifetime", + default=0, + show_default=True, + help="The lifetime of the server in seconds.", + type=int, +) @click.option( "--idle-timeout", default=0, @@ -1366,10 +1426,15 @@ def server(host, port, lifetime, idle_timeout=0, state_file=None, **kwargs): envvar="GITHUB_HEAD_REF", ) @click.option( - "--git-base-branch", help="The git branch of the base environment.", type=click.STRING, envvar="GITHUB_BASE_REF" + "--git-base-branch", + help="The git branch of the base environment.", + type=click.STRING, + envvar="GITHUB_BASE_REF", ) @click.option( - "--github-pull-request-url", help="The github pull request url to use for the lineage.", type=click.STRING + "--github-pull-request-url", + help="The github pull request url to use for the lineage.", + type=click.STRING, ) @add_options(dbt_related_options) @add_options(sqlmesh_related_options) @@ -1448,7 +1513,6 @@ def run(output, **kwargs): # Verify the output state file path try: if os.path.isdir(output) or output.endswith("/"): - output_dir = Path(output) # Create the directory if not exists output_dir.mkdir(parents=True, exist_ok=True) @@ -1507,7 +1571,10 @@ def summary(state_file, **kwargs): ) state_loader = create_state_loader( - review_mode=True, cloud_mode=cloud_mode, state_file=state_file, cloud_options=cloud_options + review_mode=True, + cloud_mode=cloud_mode, + state_file=state_file, + cloud_options=cloud_options, ) state_loader.load() @@ -1566,7 +1633,12 @@ def cloud(**kwargs): @cloud.command(cls=TrackCommand) -@click.option("--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN") +@click.option( + "--cloud-token", + help="The GitHub token used by Recce Cloud.", + type=click.STRING, + envvar="GITHUB_TOKEN", +) @click.option( "--state-file-host", help="The host to fetch the state file from.", @@ -1582,7 +1654,12 @@ def cloud(**kwargs): type=click.STRING, envvar="RECCE_STATE_PASSWORD", ) -@click.option("--force", "-f", help="Bypasses the confirmation prompt. Purge the state file directly.", is_flag=True) +@click.option( + "--force", + "-f", + help="Bypasses the confirmation prompt. Purge the state file directly.", + is_flag=True, +) @add_options(recce_options) def purge(**kwargs): """ @@ -1605,7 +1682,10 @@ def purge(**kwargs): try: console.rule("Check Recce State from Cloud") state_loader = create_state_loader( - review_mode=False, cloud_mode=True, state_file=None, cloud_options=cloud_options + review_mode=False, + cloud_mode=True, + state_file=None, + cloud_options=cloud_options, ) state_loader.load() except Exception: @@ -1653,7 +1733,12 @@ def purge(**kwargs): @cloud.command(cls=TrackCommand) @click.argument("state_file", type=click.Path(exists=True)) -@click.option("--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN") +@click.option( + "--cloud-token", + help="The GitHub token used by Recce Cloud.", + type=click.STRING, + envvar="GITHUB_TOKEN", +) @click.option( "--state-file-host", help="The host to fetch the state file from.", @@ -1689,7 +1774,10 @@ def upload(state_file, **kwargs): # load local state state_loader = create_state_loader( - review_mode=False, cloud_mode=False, state_file=state_file, cloud_options=cloud_options + review_mode=False, + cloud_mode=False, + state_file=state_file, + cloud_options=cloud_options, ) state_loader.load() @@ -1724,7 +1812,12 @@ def upload(state_file, **kwargs): default=DEFAULT_RECCE_STATE_FILE, show_default=True, ) -@click.option("--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN") +@click.option( + "--cloud-token", + help="The GitHub token used by Recce Cloud.", + type=click.STRING, + envvar="GITHUB_TOKEN", +) @click.option( "--state-file-host", help="The host to fetch the state file from.", @@ -1778,7 +1871,12 @@ def download(**kwargs): @cloud.command(cls=TrackCommand) -@click.option("--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN") +@click.option( + "--cloud-token", + help="The GitHub token used by Recce Cloud.", + type=click.STRING, + envvar="GITHUB_TOKEN", +) @click.option( "--branch", "-b", @@ -1825,7 +1923,11 @@ def upload_artifacts(**kwargs): try: rc = upload_dbt_artifacts( - target_path, branch=branch, token=cloud_token, password=password, debug=kwargs.get("debug", False) + target_path, + branch=branch, + token=cloud_token, + password=password, + debug=kwargs.get("debug", False), ) console.rule("Uploaded Successfully") console.print( @@ -1878,7 +1980,12 @@ def _download_artifacts(branch, cloud_token, console, kwargs, password, target_p @cloud.command(cls=TrackCommand) -@click.option("--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN") +@click.option( + "--cloud-token", + help="The GitHub token used by Recce Cloud.", + type=click.STRING, + envvar="GITHUB_TOKEN", +) @click.option( "--branch", "-b", @@ -1901,7 +2008,12 @@ def _download_artifacts(branch, cloud_token, console, kwargs, password, target_p envvar="RECCE_STATE_PASSWORD", required=True, ) -@click.option("--force", "-f", help="Bypasses the confirmation prompt. Download the artifacts directly.", is_flag=True) +@click.option( + "--force", + "-f", + help="Bypasses the confirmation prompt. Download the artifacts directly.", + is_flag=True, +) @add_options(recce_options) def download_artifacts(**kwargs): """ @@ -1926,7 +2038,12 @@ def download_artifacts(**kwargs): @cloud.command(cls=TrackCommand) -@click.option("--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN") +@click.option( + "--cloud-token", + help="The GitHub token used by Recce Cloud.", + type=click.STRING, + envvar="GITHUB_TOKEN", +) @click.option( "--branch", "-b", @@ -1949,7 +2066,12 @@ def download_artifacts(**kwargs): envvar="RECCE_STATE_PASSWORD", required=True, ) -@click.option("--force", "-f", help="Bypasses the confirmation prompt. Download the artifacts directly.", is_flag=True) +@click.option( + "--force", + "-f", + help="Bypasses the confirmation prompt. Download the artifacts directly.", + is_flag=True, +) @add_options(recce_options) def download_base_artifacts(**kwargs): """ @@ -1980,7 +2102,12 @@ def download_base_artifacts(**kwargs): @cloud.command(cls=TrackCommand) -@click.option("--cloud-token", help="The GitHub token used by Recce Cloud.", type=click.STRING, envvar="GITHUB_TOKEN") +@click.option( + "--cloud-token", + help="The GitHub token used by Recce Cloud.", + type=click.STRING, + envvar="GITHUB_TOKEN", +) @click.option( "--branch", "-b", @@ -1988,7 +2115,12 @@ def download_base_artifacts(**kwargs): type=click.STRING, envvar="GITHUB_HEAD_REF", ) -@click.option("--force", "-f", help="Bypasses the confirmation prompt. Delete the artifacts directly.", is_flag=True) +@click.option( + "--force", + "-f", + help="Bypasses the confirmation prompt. Delete the artifacts directly.", + is_flag=True, +) @add_options(recce_options) def delete_artifacts(**kwargs): """ @@ -2032,7 +2164,12 @@ def delete_artifacts(**kwargs): @cloud.command(cls=TrackCommand, name="list-organizations") -@click.option("--api-token", help="The Recce Cloud API token.", type=click.STRING, envvar="RECCE_API_TOKEN") +@click.option( + "--api-token", + help="The Recce Cloud API token.", + type=click.STRING, + envvar="RECCE_API_TOKEN", +) @add_options(recce_options) def list_organizations(**kwargs): """ @@ -2092,7 +2229,12 @@ def list_organizations(**kwargs): type=click.STRING, envvar="RECCE_ORGANIZATION_ID", ) -@click.option("--api-token", help="The Recce Cloud API token.", type=click.STRING, envvar="RECCE_API_TOKEN") +@click.option( + "--api-token", + help="The Recce Cloud API token.", + type=click.STRING, + envvar="RECCE_API_TOKEN", +) @add_options(recce_options) def list_projects(**kwargs): """ @@ -2151,7 +2293,11 @@ def list_projects(**kwargs): table.add_column("Display Name", style="yellow") for project in projects: - table.add_row(str(project.get("id", "")), project.get("name", ""), project.get("display_name", "")) + table.add_row( + str(project.get("id", "")), + project.get("name", ""), + project.get("display_name", ""), + ) console.print(table) @@ -2178,7 +2324,12 @@ def list_projects(**kwargs): type=click.STRING, envvar="RECCE_PROJECT_ID", ) -@click.option("--api-token", help="The Recce Cloud API token.", type=click.STRING, envvar="RECCE_API_TOKEN") +@click.option( + "--api-token", + help="The Recce Cloud API token.", + type=click.STRING, + envvar="RECCE_API_TOKEN", +) @add_options(recce_options) def list_sessions(**kwargs): """ @@ -2270,7 +2421,8 @@ def github(**kwargs): @github.command( - cls=TrackCommand, short_help="Download the artifacts from the GitHub repository based on the current Pull Request." + cls=TrackCommand, + short_help="Download the artifacts from the GitHub repository based on the current Pull Request.", ) @click.option( "--github-token", @@ -2328,7 +2480,10 @@ def share(state_file, **kwargs): # load local state state_loader = create_state_loader( - review_mode=True, cloud_mode=False, state_file=state_file, cloud_options=cloud_options + review_mode=True, + cloud_mode=False, + state_file=state_file, + cloud_options=cloud_options, ) state_loader.load() @@ -2431,7 +2586,10 @@ def upload_session(**kwargs): try: rc = upload_artifacts_to_session( - target_path, session_id=session_id, token=api_token, debug=kwargs.get("debug", False) + target_path, + session_id=session_id, + token=api_token, + debug=kwargs.get("debug", False), ) console.rule("Uploaded Successfully") console.print( @@ -2464,8 +2622,19 @@ def snapshot(**kwargs): @click.argument("state_file", required=True) @click.option("--host", default="localhost", show_default=True, help="The host to bind to.") @click.option("--port", default=8000, show_default=True, help="The port to bind to.", type=int) -@click.option("--lifetime", default=0, show_default=True, help="The lifetime of the server in seconds.", type=int) -@click.option("--share-url", help="The share URL triggers this instance.", type=click.STRING, envvar="RECCE_SHARE_URL") +@click.option( + "--lifetime", + default=0, + show_default=True, + help="The lifetime of the server in seconds.", + type=int, +) +@click.option( + "--share-url", + help="The share URL triggers this instance.", + type=click.STRING, + envvar="RECCE_SHARE_URL", +) @click.pass_context def read_only(ctx, state_file=None, **kwargs): from recce.server import RecceServerMode @@ -2477,10 +2646,24 @@ def read_only(ctx, state_file=None, **kwargs): @cli.command(cls=TrackCommand) @click.argument("state_file", required=False) -@click.option("--sse", is_flag=True, default=False, help="Start in HTTP/SSE mode instead of stdio mode") -@click.option("--host", default="localhost", help="Host to bind to in SSE mode (default: localhost)") +@click.option( + "--sse", + is_flag=True, + default=False, + help="Start in HTTP/SSE mode instead of stdio mode", +) +@click.option( + "--host", + default="localhost", + help="Host to bind to in SSE mode (default: localhost)", +) @click.option("--port", default=8000, type=int, help="Port to bind to in SSE mode (default: 8000)") -@click.option("--session", "cloud_session", type=click.STRING, help="Recce Cloud session ID for cloud MCP mode") +@click.option( + "--session", + "cloud_session", + type=click.STRING, + help="Recce Cloud session ID for cloud MCP mode", +) @add_options(dbt_related_options) @add_options(sqlmesh_related_options) @add_options(recce_options) @@ -2761,5 +2944,199 @@ def clear_cache(cache_db): pass +def resolve_target_base_path( + project_dir: str | None, + target_base_path: str, +) -> str: + """Resolve ``target_base_path`` against ``project_dir`` like every dbt-aware + command does. + + An absolute ``target_base_path`` bypasses the join. Relative paths are + joined onto ``project_dir`` (or CWD if ``project_dir`` is None). + + Shared by the ``recce check-base`` CLI command and the ``recce mcp-server`` + startup freshness check so they cannot drift — having two copies of the + join logic was the root cause of the round-2 review finding (CLI was + fixed, MCP startup was missed). + """ + base = Path(target_base_path) + if base.is_absolute(): + return str(base) + return str(Path(project_dir or "./") / base) + + +def check_base_freshness( + target_base_path: str = "target-base", + freshness_threshold_hours: float = 48.0, +) -> dict: + """ + Check whether the base artifacts in target_base_path are fresh. + + Returns a dict with keys: + status: "fresh" | "stale_time" | "stale_sha" | "missing" + recommendation: "reuse" | "docs_generate" | "full_build" + message: human-readable explanation + artifact_age_hours: float or None + base_sha: str or None (DBT_GIT_SHA from manifest metadata) + current_sha: str or None (current HEAD SHA) + threshold_hours: float + """ + import json + import logging + import time + + _logger = logging.getLogger("recce") + + manifest_path = Path(target_base_path) / "manifest.json" + result: dict = { + "status": None, + "recommendation": None, + "message": None, + "artifact_age_hours": None, + "base_sha": None, + "current_sha": None, + "threshold_hours": freshness_threshold_hours, + } + + if not manifest_path.exists(): + result["status"] = "missing" + result["recommendation"] = "full_build" + result["message"] = ( + f"Base artifacts not found at '{target_base_path}/manifest.json'. " + f"Run: git stash; git checkout ; dbt build --target-path {target_base_path}; " + "git checkout ; git stash pop" + ) + return result + + # Compute artifact age from mtime + mtime = manifest_path.stat().st_mtime + now = time.time() + artifact_age_hours = (now - mtime) / 3600.0 + result["artifact_age_hours"] = artifact_age_hours + + # Time-based freshness check + if artifact_age_hours > freshness_threshold_hours: + result["status"] = "stale_time" + result["recommendation"] = "docs_generate" + result["message"] = ( + f"Base artifacts are stale ({artifact_age_hours:.1f} hours old, " + f"threshold: {freshness_threshold_hours}h). " + f"Run: dbt docs generate --target-path {target_base_path}" + ) + return result + + # SHA-based freshness check (best-effort: skip if field absent or git unavailable) + try: + with open(manifest_path) as f: + manifest_data = json.load(f) + base_sha = manifest_data.get("metadata", {}).get("env", {}).get("DBT_GIT_SHA") + result["base_sha"] = base_sha + + if base_sha is not None: + from recce.git import current_commit_hash + + current_sha = current_commit_hash() + result["current_sha"] = current_sha + if current_sha and base_sha != current_sha: + result["status"] = "stale_sha" + result["recommendation"] = "docs_generate" + result["message"] = ( + f"Base artifacts are stale (generated at {base_sha[:7]}, " + f"current HEAD: {current_sha[:7]}). " + f"Run: dbt docs generate --target-path {target_base_path}" + ) + return result + except Exception as e: + # Best-effort: if manifest is unreadable or git is unavailable, skip SHA check. + # Log at debug so the reason is recoverable without surfacing in normal output. + _logger.debug(f"check_base_freshness: SHA check skipped ({e})") + + result["status"] = "fresh" + result["recommendation"] = "reuse" + result["message"] = f"Base artifacts are fresh ({artifact_age_hours:.1f} hours old). " "Reuse existing artifacts." + return result + + +@cli.command(name="check-base", cls=TrackCommand) +@click.option( + "--project-dir", + help="Which directory to look in for the dbt_project.yml file. " + "target-base-path is resolved relative to this when not absolute.", + type=click.Path(), + envvar="DBT_PROJECT_DIR", +) +@click.option( + "--target-base-path", + default="target-base", + show_default=True, + help="Path to the base artifacts directory (relative to --project-dir if not absolute).", + type=click.Path(), +) +@click.option( + "--format", + "output_format", + default="json", + show_default=True, + type=click.Choice(["json", "text"]), + help="Output format: json (default) or text for human-readable output.", +) +@click.option( + "--freshness-threshold-hours", + default=48.0, + show_default=True, + type=float, + help="Age threshold in hours after which artifacts are considered stale_time.", +) +def check_base(project_dir, target_base_path, output_format, freshness_threshold_hours): + """Check freshness of base artifacts for diff operations. + + Exit codes: + 0 - fresh + 1 - missing (rebuild required: full_build) + 2 - stale_time / stale_sha (regenerate when convenient: docs_generate) + """ + import json + + # Honor --project-dir / DBT_PROJECT_DIR like every other dbt-aware command. + resolved_target_base = resolve_target_base_path(project_dir, target_base_path) + + result = check_base_freshness( + target_base_path=resolved_target_base, + freshness_threshold_hours=freshness_threshold_hours, + ) + + if output_format == "json": + click.echo(json.dumps(result, indent=2)) + else: + from rich.console import Console + + console = Console() + status = result["status"] + age = result.get("artifact_age_hours") + msg = result.get("message", "") + color_map = { + "fresh": "green", + "stale_time": "yellow", + "stale_sha": "yellow", + "missing": "red", + } + color = color_map.get(status, "white") + console.print(f"[{color}]Status: {status}[/{color}]") + if age is not None: + console.print(f"Age: {age:.1f}h (threshold: {result['threshold_hours']}h)") + console.print(f"Recommendation: {result['recommendation']}") + if msg: + console.print(msg) + + # Distinct exit codes so shell automation can branch without parsing JSON. + status = result["status"] + if status == "fresh": + return + if status == "missing": + raise SystemExit(1) + # stale_time, stale_sha (and any future non-fresh status) + raise SystemExit(2) + + if __name__ == "__main__": cli() diff --git a/recce/mcp_server.py b/recce/mcp_server.py index 3034db31b..9b59ad1d3 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -205,7 +205,11 @@ async def _tool_run_check(self, arguments: Dict[str, Any]) -> Dict[str, Any]: check_id = arguments.get("check_id") if not check_id: raise ValueError("check_id is required") - run = await self._request("POST", f"checks/{quote(str(check_id), safe='')}/run", json={"nowait": False}) + run = await self._request( + "POST", + f"checks/{quote(str(check_id), safe='')}/run", + json={"nowait": False}, + ) if self._run_succeeded(run): await self._auto_approve(check_id) return run @@ -230,12 +234,20 @@ async def _tool_create_check(self, arguments: Dict[str, Any]) -> Dict[str, Any]: # executable run). lineage_diff/schema_diff are recorded server-side via # POST /checks/{id}/run, mirroring local _create_metadata_run. if check_id and check_type != "simple": - run = await self._request("POST", f"checks/{quote(str(check_id), safe='')}/run", json={"nowait": False}) + run = await self._request( + "POST", + f"checks/{quote(str(check_id), safe='')}/run", + json={"nowait": False}, + ) run_executed = True run_error = run.get("error") if self._run_succeeded(run): await self._auto_approve(check_id) - result = {"check_id": str(check_id), "created": True, "run_executed": run_executed} + result = { + "check_id": str(check_id), + "created": True, + "run_executed": run_executed, + } if run_error: result["run_error"] = run_error return result @@ -248,7 +260,11 @@ async def _auto_approve(self, check_id) -> None: post-success side-effect, not part of the run contract. """ try: - await self._request("PATCH", f"checks/{quote(str(check_id), safe='')}", json={"is_checked": True}) + await self._request( + "PATCH", + f"checks/{quote(str(check_id), safe='')}", + json={"is_checked": True}, + ) except (RecceCloudException, InstanceSpawningError) as e: logger.warning(f"[MCP] Auto-approve failed for check {check_id}: {e}") @@ -292,7 +308,10 @@ async def _tool_lineage_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]: if source in id_to_idx and target in id_to_idx: edge_rows.append((id_to_idx[source], id_to_idx[target])) edges_df = DataFrame.from_data(columns={"from": "integer", "to": "integer"}, data=edge_rows) - return {"nodes": nodes_df.model_dump(mode="json"), "edges": edges_df.model_dump(mode="json")} + return { + "nodes": nodes_df.model_dump(mode="json"), + "edges": edges_df.model_dump(mode="json"), + } async def _tool_schema_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]: info = await self._request("GET", "info") @@ -316,7 +335,10 @@ async def _tool_schema_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]: async def _tool_impact_analysis(self, arguments: Dict[str, Any]) -> Dict[str, Any]: info = await self._request("GET", "info") nodes = info.get("lineage", {}).get("nodes", {}) - select = arguments.get("select", "state:modified.body+ state:modified.macros+ state:modified.contract+") + select = arguments.get( + "select", + "state:modified.body+ state:modified.macros+ state:modified.contract+", + ) impacted_node_ids = set((await self._request("POST", "select", json={"select": select})).get("nodes", [])) modified_node_ids = set( (await self._request("POST", "select", json={"select": "state:modified"})).get("nodes", []) @@ -682,7 +704,15 @@ async def list_tools() -> List[Tool]: description="Get server context information including current backend mode " "('local', 'cloud', or 'none' when unconfigured), adapter type, git branch, " "supported tasks, and review mode status. Useful for diagnostics and " - "understanding which diff tools are available.", + "understanding which diff tools are available.\n\n" + "The 'base_status' field is a single-cased enum (all lowercase) reporting " + "whether the local target-base/ artifacts are usable for diff operations:\n" + " - 'fresh': artifacts are within the freshness threshold and SHA matches; safe to diff.\n" + " - 'stale_time': artifacts older than the freshness threshold; diff results may be outdated.\n" + " - 'stale_sha': artifacts were generated against a different git SHA than current HEAD.\n" + " - 'missing': no manifest.json under target-base/; diffs will fail until rebuilt.\n" + " - 'single_env': server is running in single-env mode; diff is not applicable.\n" + " - 'unknown': freshness check did not run (e.g., cloud mode or check skipped).", inputSchema={ "type": "object", "properties": {}, @@ -2039,6 +2069,14 @@ async def _tool_get_server_info(self, arguments: Dict[str, Any]) -> Dict[str, An "single_env": self.single_env, } + # Include base_status so agents can programmatically detect stale state. + # All-lowercase enum (matches the recommendation field's casing): + # "fresh" | "stale_time" | "stale_sha" | "missing" | "single_env" | "unknown" + if self.single_env: + result["base_status"] = "single_env" + else: + result["base_status"] = getattr(self, "_base_status", "unknown") + # Add git and pull_request info if state_loader is available if context.state_loader: try: @@ -2122,7 +2160,10 @@ async def _tool_set_backend(self, arguments: Dict[str, Any]) -> Dict[str, Any]: else: self.single_env = not base_path.is_dir() - load_kwargs = {"target_path": target_path, "target_base_path": effective_base} + load_kwargs = { + "target_path": target_path, + "target_base_path": effective_base, + } if project_dir: load_kwargs["project_dir"] = project_dir self.context = load_context(**load_kwargs) @@ -2571,6 +2612,43 @@ async def run_mcp_server( ) server._local_cache_key = cache_key + # Freshness check (M2, AC-3): warn on stale or missing base artifacts at startup. + # Lazy import to avoid circular import; best-effort — startup never fails here. + try: + from recce.cli import check_base_freshness, resolve_target_base_path + + # Honor --project-dir like the CLI does. Without this, MCP + # startup looks for ./target-base/manifest.json relative to + # CWD, missing artifacts that exist at --project-dir-relative + # paths (or worse, picking up a stale manifest from another + # project that happens to live in CWD). + _tb = resolve_target_base_path( + kwargs.get("project_dir"), + kwargs.get("target_base_path", "target-base"), + ) + _freshness = check_base_freshness(target_base_path=_tb) + server._base_status = _freshness.get("status", "fresh") + if server._base_status in ("stale_time", "stale_sha"): + _age = _freshness.get("artifact_age_hours") or 0 + logger.warning( + f"Base artifacts are stale ({_age:.1f} hours old). " + "Diffs may not reflect the latest base. " + f"Run: dbt docs generate --target-path {_tb} " + "(or use `recce check-base` for full diagnosis)" + ) + elif server._base_status == "missing": + # MISSING is the most actionable failure of the three: diffs + # will fail outright, not silently mislead. Surface it loudly + # alongside the rebuild path. + logger.warning( + f"Base artifacts not found at '{_tb}/manifest.json'. " + "Diff tools will fail until base artifacts are built. " + f"Run: dbt build --target-path {_tb} " + "(or use `recce check-base` for full diagnosis)" + ) + except Exception as _e: + logger.debug(f"[MCP] Base freshness check skipped: {_e}") + # Run in either stdio or SSE mode if sse: # SSE mode: lifespan handler in Starlette manages shutdown and state export diff --git a/tests/test_check_base.py b/tests/test_check_base.py new file mode 100644 index 000000000..51e4260c0 --- /dev/null +++ b/tests/test_check_base.py @@ -0,0 +1,332 @@ +""" +Tests for check_base_freshness() helper and the `recce check-base` CLI (M2, AC-3). + +Coverage: + Helper (`check_base_freshness()`): + - test_status_fresh — mtime within threshold → fresh + - test_status_stale_time — mtime > 48 h → stale_time + - test_status_stale_sha — SHA mismatch → stale_sha + - test_status_missing — no manifest.json → missing + - test_sha_absent_no_raise — R9 best-effort: missing DBT_GIT_SHA field → fresh + + CLI (`recce check-base`): + - test_cli_json_schema_fresh — JSON shape includes the documented fields + - test_cli_text_format_renders — --format text prints status line + - test_cli_exit_code_fresh — fresh → exit 0 + - test_cli_exit_code_missing — missing → exit 1 + - test_cli_exit_code_stale_time — stale_time → exit 2 + - test_cli_project_dir_resolves — --project-dir joins onto target-base-path + + Helper (`resolve_target_base_path`): + - test_resolve_relative_joins_with_project_dir + - test_resolve_absolute_bypasses_project_dir + - test_resolve_no_project_dir_uses_cwd + - test_resolve_mcp_startup_finds_artifacts_under_project_dir +""" + +import json +import os +import time +from pathlib import Path +from unittest.mock import patch + +import pytest +from click.testing import CliRunner + +from recce.cli import check_base, check_base_freshness, resolve_target_base_path + + +@pytest.fixture() +def fresh_manifest_dir(tmp_path): + """Create a target-base/ directory with a freshly-written manifest.json.""" + target_base = tmp_path / "target-base" + target_base.mkdir() + manifest = { + "metadata": { + "generated_at": "2024-01-01T00:00:00.000000Z", + "env": { + "DBT_GIT_SHA": "abc1234def5678901234567890123456789012ab", + }, + } + } + (target_base / "manifest.json").write_text(json.dumps(manifest)) + return target_base + + +@pytest.fixture() +def old_manifest_dir(tmp_path): + """Create a target-base/ directory with a manifest.json whose mtime is 73 h ago.""" + target_base = tmp_path / "target-base" + target_base.mkdir() + manifest_path = target_base / "manifest.json" + manifest_path.write_text(json.dumps({"metadata": {"env": {}}})) + # Back-date mtime by 73 hours + old_time = time.time() - (73 * 3600) + os.utime(manifest_path, (old_time, old_time)) + return target_base + + +# --------------------------------------------------------------------------- +# Helper tests — exercise check_base_freshness() directly +# --------------------------------------------------------------------------- + + +def test_status_fresh(fresh_manifest_dir): + """Manifest mtime within threshold and matching SHA → fresh.""" + manifest_sha = "abc1234def5678901234567890123456789012ab" + with patch("recce.git.current_commit_hash", return_value=manifest_sha): + result = check_base_freshness( + target_base_path=str(fresh_manifest_dir), + freshness_threshold_hours=48.0, + ) + assert result["status"] == "fresh" + assert result["recommendation"] == "reuse" + assert result["artifact_age_hours"] is not None + assert result["artifact_age_hours"] < 48.0 + + +def test_status_stale_time(old_manifest_dir): + """Manifest mtime > 48 h threshold → stale_time, message contains 'stale'.""" + result = check_base_freshness( + target_base_path=str(old_manifest_dir), + freshness_threshold_hours=48.0, + ) + assert result["status"] == "stale_time" + assert result["recommendation"] == "docs_generate" + assert "stale" in result["message"].lower() + assert result["artifact_age_hours"] > 48.0 + + +def test_status_stale_sha(fresh_manifest_dir): + """SHA in manifest differs from current HEAD → stale_sha, message contains 'stale'.""" + different_sha = "9999999deadbeef0000000000000000000000000" + with patch("recce.git.current_commit_hash", return_value=different_sha): + result = check_base_freshness( + target_base_path=str(fresh_manifest_dir), + freshness_threshold_hours=48.0, + ) + assert result["status"] == "stale_sha" + assert result["recommendation"] == "docs_generate" + assert "stale" in result["message"].lower() + + +def test_status_missing(tmp_path): + """No manifest.json in target_base_path → missing, recommendation full_build.""" + non_existent = tmp_path / "target-base-empty" + result = check_base_freshness( + target_base_path=str(non_existent), + freshness_threshold_hours=48.0, + ) + assert result["status"] == "missing" + assert result["recommendation"] == "full_build" + assert result["artifact_age_hours"] is None + # Suggested command must reference the user-supplied target_base_path, + # not the hardcoded default. + assert str(non_existent) in result["message"] + + +def test_sha_absent_no_raise(tmp_path): + """R9 best-effort: DBT_GIT_SHA field absent in manifest → fresh, no exception raised.""" + target_base = tmp_path / "target-base" + target_base.mkdir() + # Manifest has no DBT_GIT_SHA in metadata.env + manifest_no_sha = { + "metadata": { + "generated_at": "2024-01-01T00:00:00.000000Z", + "env": {}, # DBT_GIT_SHA is absent + } + } + (target_base / "manifest.json").write_text(json.dumps(manifest_no_sha)) + + # Should not raise, and should fall through to fresh (time check passes) + result = check_base_freshness( + target_base_path=str(target_base), + freshness_threshold_hours=48.0, + ) + # With no DBT_GIT_SHA, SHA check is skipped → fresh (time check passed) + assert result["status"] == "fresh" + assert result["base_sha"] is None + + +# --------------------------------------------------------------------------- +# CLI tests — exercise `recce check-base` end-to-end via Click's CliRunner +# --------------------------------------------------------------------------- + + +def test_cli_json_schema_fresh(fresh_manifest_dir): + """--format json (default) emits all documented fields and lowercase status.""" + manifest_sha = "abc1234def5678901234567890123456789012ab" + runner = CliRunner() + with patch("recce.git.current_commit_hash", return_value=manifest_sha): + result = runner.invoke( + check_base, + ["--target-base-path", str(fresh_manifest_dir)], + ) + assert result.exit_code == 0, result.output + payload = json.loads(result.output) + # Documented schema — every key must be present. + expected_keys = { + "status", + "recommendation", + "message", + "artifact_age_hours", + "base_sha", + "current_sha", + "threshold_hours", + } + assert expected_keys.issubset(payload.keys()) + assert payload["status"] == "fresh" + assert payload["recommendation"] == "reuse" + + +def test_cli_text_format_renders(fresh_manifest_dir): + """--format text emits a human-readable status line.""" + manifest_sha = "abc1234def5678901234567890123456789012ab" + runner = CliRunner() + with patch("recce.git.current_commit_hash", return_value=manifest_sha): + result = runner.invoke( + check_base, + [ + "--target-base-path", + str(fresh_manifest_dir), + "--format", + "text", + ], + ) + assert result.exit_code == 0, result.output + assert "Status: fresh" in result.output + assert "Recommendation: reuse" in result.output + + +def test_cli_exit_code_fresh(fresh_manifest_dir): + """fresh → exit 0 (no SystemExit raised).""" + manifest_sha = "abc1234def5678901234567890123456789012ab" + runner = CliRunner() + with patch("recce.git.current_commit_hash", return_value=manifest_sha): + result = runner.invoke( + check_base, + ["--target-base-path", str(fresh_manifest_dir)], + ) + assert result.exit_code == 0 + + +def test_cli_exit_code_missing(tmp_path): + """missing → exit 1 (rebuild required).""" + non_existent = tmp_path / "target-base-empty" + runner = CliRunner() + result = runner.invoke( + check_base, + ["--target-base-path", str(non_existent)], + ) + assert result.exit_code == 1 + payload = json.loads(result.output) + assert payload["status"] == "missing" + assert payload["recommendation"] == "full_build" + + +def test_cli_exit_code_stale_time(old_manifest_dir): + """stale_time → exit 2 (regenerate when convenient).""" + runner = CliRunner() + result = runner.invoke( + check_base, + [ + "--target-base-path", + str(old_manifest_dir), + "--freshness-threshold-hours", + "48", + ], + ) + assert result.exit_code == 2 + payload = json.loads(result.output) + assert payload["status"] == "stale_time" + assert payload["recommendation"] == "docs_generate" + + +def test_cli_project_dir_resolves(tmp_path): + """--project-dir joins onto a relative --target-base-path before resolving.""" + project_dir = tmp_path / "my_dbt_project" + project_dir.mkdir() + target_base = project_dir / "target-base" + target_base.mkdir() + manifest = { + "metadata": { + "env": { + "DBT_GIT_SHA": "abc1234def5678901234567890123456789012ab", + }, + } + } + (target_base / "manifest.json").write_text(json.dumps(manifest)) + + manifest_sha = "abc1234def5678901234567890123456789012ab" + runner = CliRunner() + # Invoke from tmp_path with --project-dir; relative target-base-path "target-base" + # should resolve under project_dir. + with patch("recce.git.current_commit_hash", return_value=manifest_sha): + result = runner.invoke( + check_base, + [ + "--project-dir", + str(project_dir), + "--target-base-path", + "target-base", + ], + ) + assert result.exit_code == 0, result.output + payload = json.loads(result.output) + assert payload["status"] == "fresh" + + +# --------------------------------------------------------------------------- +# resolve_target_base_path() — shared by CLI and MCP startup so the join logic +# cannot drift (round-2 review: MCP startup was missing the join after the CLI +# was fixed). +# --------------------------------------------------------------------------- + + +def test_resolve_relative_joins_with_project_dir(): + """A relative target-base-path is joined under project-dir.""" + resolved = resolve_target_base_path("/foo/bar", "target-base") + assert Path(resolved) == Path("/foo/bar") / "target-base" + + +def test_resolve_absolute_bypasses_project_dir(): + """An absolute target-base-path bypasses the join entirely.""" + resolved = resolve_target_base_path("/foo/bar", "/tmp/abs/target-base") + assert Path(resolved) == Path("/tmp/abs/target-base") + + +def test_resolve_no_project_dir_uses_cwd(): + """When project_dir is None, resolution is relative to CWD ('./').""" + resolved = resolve_target_base_path(None, "target-base") + # Don't compare against a CWD-dependent absolute path; just verify the + # relative path semantics: joining ./ with target-base. + assert Path(resolved) == Path("./") / "target-base" + + +def test_resolve_mcp_startup_finds_artifacts_under_project_dir(tmp_path): + """Regression for the round-2 review finding: MCP startup must use the + same resolution as the CLI so artifacts under --project-dir are found. + + Mirrors test_cli_project_dir_resolves: builds a fresh manifest at + {project_dir}/target-base/manifest.json, then asserts that the resolution + helper produces a path whose freshness check returns 'fresh'. Without the + helper, MCP startup would look at ./target-base relative to CWD and miss + the artifact entirely. + """ + project_dir = tmp_path / "my_dbt_project" + project_dir.mkdir() + target_base = project_dir / "target-base" + target_base.mkdir() + manifest_sha = "abc1234def5678901234567890123456789012ab" + manifest = {"metadata": {"env": {"DBT_GIT_SHA": manifest_sha}}} + (target_base / "manifest.json").write_text(json.dumps(manifest)) + + # The MCP startup-equivalent invocation: pass project_dir + relative + # target_base_path to the shared helper, then run the freshness check. + resolved = resolve_target_base_path(str(project_dir), "target-base") + assert Path(resolved) == target_base + + with patch("recce.git.current_commit_hash", return_value=manifest_sha): + result = check_base_freshness(target_base_path=resolved) + + assert result["status"] == "fresh", result