Add import_mapping tool (#26)

dbast · web-flow · commit 59ce00732e1c · 2025-09-06T09:29:36.000+02:00
diff --git a/README.md b/README.md
@@ -16,6 +16,7 @@ Currently available:
 - Version metadata (MCP tool/library versions) via the `info` tool
 - Package info tarball data via the `package_insights` tool
 - Package search via the `package_search` tool
+- Import to package heuristic mapping via the `import_mapping` tool
 - CLI help (for conda) via the `cli_help` tool
 
 Planned:
diff --git a/conda_meta_mcp/tools/__init__.py b/conda_meta_mcp/tools/__init__.py
@@ -1,8 +1,15 @@
 from .cli_help import register_cli_help
+from .import_mapping import register_import_mapping
 from .info import register_info
 from .pkg_insights import register_package_insights
 from .pkg_search import register_package_search
 
-TOOLS = [register_cli_help, register_info, register_package_insights, register_package_search]
+TOOLS = [
+    register_cli_help,
+    register_info,
+    register_package_insights,
+    register_package_search,
+    register_import_mapping,
+]
 
 __all__ = ["TOOLS"]
diff --git a/conda_meta_mcp/tools/import_mapping.py b/conda_meta_mcp/tools/import_mapping.py
@@ -0,0 +1,101 @@
+"""
+import_mapping tool
+
+This tool is based on (and wraps) logic from:
+`conda_forge_metadata.autotick_bot.import_to_pkg`
+"""
+
+from __future__ import annotations
+
+import asyncio
+from functools import lru_cache
+from typing import TYPE_CHECKING
+
+from fastmcp.exceptions import ToolError
+
+if TYPE_CHECKING:
+    from fastmcp import FastMCP
+
+
+from conda_forge_metadata.autotick_bot.import_to_pkg import (
+    get_pkgs_for_import,
+    map_import_to_package,
+)
+
+
+@lru_cache(maxsize=1024)
+def _map_import(import_name: str) -> dict:
+    if not import_name or not import_name.strip():
+        raise ValueError("import_name must be a non-empty string")
+
+    query = import_name.strip()
+
+    # Underlying function truncates to top-level automatically.
+    candidates, normalized = get_pkgs_for_import(query)
+
+    if candidates is None or len(candidates) == 0:
+        # No mapping known; identity fallback.
+        return {
+            "query_import": query,
+            "normalized_import": normalized,
+            "best_package": normalized,
+            "candidate_packages": [],
+            "heuristic": "identity",
+        }
+
+    best = map_import_to_package(query)
+
+    if best == normalized and best in candidates:
+        heuristic = "identity_present"
+    elif best in candidates:
+        heuristic = "ranked_selection"
+    else:
+        heuristic = "fallback"
+
+    return {
+        "query_import": query,
+        "normalized_import": normalized,
+        "best_package": best,
+        "candidate_packages": sorted(candidates),
+        "heuristic": heuristic,
+    }
+
+
+def register_import_mapping(mcp: FastMCP) -> None:
+    @mcp.tool
+    async def import_mapping(import_name: str) -> dict:
+        """
+        Map a (possibly dotted) Python import name to the most likely conda package
+        and expose supporting context.
+
+        What this does:
+          - Normalizes the import to its top-level module (e.g. "numpy.linalg" -> "numpy")
+          - Retrieves an approximate candidate set of conda packages that may provide it
+          - Applies a heuristic to pick a single "best" package
+          - Returns a structured result with the decision rationale
+
+        Heuristic labels:
+          - identity:          No candidates known; fallback to normalized import
+          - identity_present:  Candidates exist AND the normalized import name is among them
+          - ranked_selection:  Best package chosen via ranked hubs authorities ordering
+          - fallback:          Best package not in candidates (unexpected edge case)
+
+        Returned dict schema:
+          {
+            "query_import":      original query string supplied by caller
+            "normalized_import": top-level portion used for lookup
+            "best_package":      chosen conda package name (may equal normalized_import)
+            "candidate_packages": sorted list of possible supplying packages (may be empty)
+            "heuristic":         one of the heuristic labels above
+          }
+
+        Args:
+          import_name:
+            Import string, e.g. "yaml", "matplotlib.pyplot", "sklearn.model_selection".
+        """
+        try:
+            return await asyncio.to_thread(_map_import, import_name)
+        except ValueError as ve:
+            raise ToolError(f"'import_mapping' invalid input: {ve}") from ve
+        except Exception as e:  # pragma: no cover - generic protection
+            raise ToolError(f"'import_mapping' failed: {e}") from e
diff --git a/pixi.lock b/pixi.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -38,6 +38,7 @@ source = "vcs"
 [tool.pixi.dependencies]
 argparse-manpage = ">=4.7"
 conda = ">=25.7.0"
+conda-forge-metadata = "*"
 conda-package-streaming = ">=0.12.0"
 fastmcp = ">=2.11.3"
 libmambapy = ">=2.3.1"
diff --git a/server-info.json b/server-info.json
@@ -212,6 +212,33 @@
           "tags": []
         }
       }
+    },
+    {
+      "name": "import_mapping",
+      "title": null,
+      "description": "Map a (possibly dotted) Python import name to the most likely conda package\nand expose supporting context.\n\nWhat this does:\n  - Normalizes the import to its top-level module (e.g. \"numpy.linalg\" -> \"numpy\")\n  - Retrieves an approximate candidate set of conda packages that may provide it\n  - Applies a heuristic to pick a single \"best\" package\n  - Returns a structured result with the decision rationale\n\nHeuristic labels:\n  - identity:          No candidates known; fallback to normalized import\n  - identity_present:  Candidates exist AND the normalized import name is among them\n  - ranked_selection:  Best package chosen via ranked hubs authorities ordering\n  - fallback:          Best package not in candidates (unexpected edge case)\n\nReturned dict schema:\n  {\n    \"query_import\":      original query string supplied by caller\n    \"normalized_import\": top-level portion used for lookup\n    \"best_package\":      chosen conda package name (may equal normalized_import)\n    \"candidate_packages\": sorted list of possible supplying packages (may be empty)\n    \"heuristic\":         one of the heuristic labels above\n  }\n\nArgs:\n  import_name:\n    Import string, e.g. \"yaml\", \"matplotlib.pyplot\", \"sklearn.model_selection\".",
+      "inputSchema": {
+        "properties": {
+          "import_name": {
+            "title": "Import Name",
+            "type": "string"
+          }
+        },
+        "required": [
+          "import_name"
+        ],
+        "type": "object"
+      },
+      "outputSchema": {
+        "additionalProperties": true,
+        "type": "object"
+      },
+      "annotations": null,
+      "_meta": {
+        "_fastmcp": {
+          "tags": []
+        }
+      }
     }
   ],
   "prompts": [],
diff --git a/tests/tools/test_import_mapping.py b/tests/tools/test_import_mapping.py
@@ -0,0 +1,65 @@
+from __future__ import annotations
+
+import pytest
+from fastmcp import Client
+from fastmcp.exceptions import ToolError
+
+# Heuristic labels the tool may legitimately emit. Keeping a central set makes the
+# success test resilient to minor internal mapping changes upstream.
+VALID_HEURISTICS = {
+    "identity",
+    "identity_present",
+    "ranked_selection",
+    "fallback",
+}
+
+
+@pytest.mark.asyncio
+async def test_import_mapping__success_basic(server):
+    """
+    Basic happy-path test: provide a dotted import and validate the response schema
+    and invariants. We intentionally do NOT assert an exact best_package value
+    (to stay resilient to upstream mapping evolution) but we enforce structural
+    correctness and heuristic membership.
+    """
+    async with Client(server) as client:
+        # Use a very common library import that should resolve deterministically.
+        result = await client.call_tool(
+            "import_mapping",
+            {
+                "import_name": "numpy.linalg",
+            },
+        )
+        data = result.data
+        # Schema keys
+        assert sorted(data.keys()) == [
+            "best_package",
+            "candidate_packages",
+            "heuristic",
+            "normalized_import",
+            "query_import",
+        ]
+        # Field relationships
+        assert data["query_import"] == "numpy.linalg"
+        assert data["normalized_import"] == "numpy"
+        assert isinstance(data["candidate_packages"], list)
+        assert all(isinstance(x, str) for x in data["candidate_packages"])
+        assert data["heuristic"] in VALID_HEURISTICS
+        assert isinstance(data["best_package"], str)
+
+
+@pytest.mark.asyncio
+async def test_import_mapping__error_on_empty_input(server):
+    """
+    Passing an empty string should surface a ToolError (input validation branch).
+    """
+    async with Client(server) as client:
+        with pytest.raises(ToolError) as exc:
+            await client.call_tool(
+                "import_mapping",
+                {
+                    "import_name": "",
+                },
+            )
+        # Sanity check on error message clarity
+        assert "invalid input" in str(exc.value).lower()
diff --git a/tests/tools/test_pkg_search.py b/tests/tools/test_pkg_search.py
@@ -17,10 +17,7 @@ def _is_sorted_newest_first(records: list[dict]) -> bool:
 
     def key(r):
         version = VersionOrder(r["version"])
-        try:
-            bn = int(r["build_number"])
-        except Exception:
-            bn = -1
+        bn = int(r["build_number"])
         return (version, bn)
 
     return all(key(prev) >= key(curr) for prev, curr in pairwise(records))