vectorize-io · nicoloboschi · Dec 18, 2025 · Dec 17, 2025
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -117,6 +117,47 @@ jobs:
         path: hindsight-clients/typescript/*.tgz
         retention-days: 1
 
+  release-control-plane:
+    runs-on: ubuntu-latest
+    environment: npm
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Node.js
+      uses: actions/setup-node@v4
+      with:
+        node-version: '20'
+        registry-url: 'https://registry.npmjs.org'
+        cache: 'npm'
+        cache-dependency-path: package-lock.json
+
+    - name: Install dependencies
+      run: npm ci
+
+    - name: Build TypeScript client (dependency)
+      run: npm run build --workspace=hindsight-clients/typescript
+
+    - name: Build
+      run: npm run build --workspace=hindsight-control-plane
+
+    - name: Publish to npm
+      working-directory: ./hindsight-control-plane
+      run: npm publish --access public
+      env:
+        NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+
+    - name: Pack for GitHub release
+      working-directory: ./hindsight-control-plane
+      run: npm pack
+
+    - name: Upload artifacts
+      uses: actions/upload-artifact@v4
+      with:
+        name: control-plane
+        path: hindsight-control-plane/*.tgz
+        retention-days: 1
+
   release-rust-cli:
     runs-on: ${{ matrix.os }}
     strategy:
@@ -287,7 +328,7 @@ jobs:
 
   create-github-release:
     runs-on: ubuntu-latest
-    needs: [release-python-packages, release-typescript-client, release-rust-cli, release-docker-images, release-helm-chart]
+    needs: [release-python-packages, release-typescript-client, release-control-plane, release-rust-cli, release-docker-images, release-helm-chart]
     permissions:
       contents: write
 
@@ -310,6 +351,12 @@ jobs:
         name: typescript-client
         path: ./artifacts/typescript-client
 
+    - name: Download Control Plane
+      uses: actions/download-artifact@v4
+      with:
+        name: control-plane
+        path: ./artifacts/control-plane
+
     - name: Download Rust CLI (Linux)
       uses: actions/download-artifact@v4
       with:
@@ -344,6 +391,8 @@ jobs:
         cp artifacts/python-packages/hindsight-integrations/litellm/dist/* release-assets/ || true
         # TypeScript client
         cp artifacts/typescript-client/*.tgz release-assets/ || true
+        # Control Plane
+        cp artifacts/control-plane/*.tgz release-assets/ || true
         # Rust CLI binaries
         cp artifacts/rust-cli-linux/hindsight-linux-amd64 release-assets/ || true
         cp artifacts/rust-cli-darwin-amd64/hindsight-darwin-amd64 release-assets/ || true

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -80,6 +80,33 @@ jobs:
     - name: Build TypeScript client
       run: npm run build --workspace=hindsight-clients/typescript
 
+  build-control-plane:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Node.js
+      uses: actions/setup-node@v4
+      with:
+        node-version: '20'
+        cache: 'npm'
+        cache-dependency-path: package-lock.json
+
+    - name: Install dependencies
+      run: npm ci
+
+    - name: Build TypeScript client (dependency)
+      run: npm run build --workspace=hindsight-clients/typescript
+
+    - name: Build control plane
+      run: npm run build --workspace=hindsight-control-plane
+
+    - name: Verify standalone build
+      run: |
+        test -f hindsight-control-plane/standalone/server.js || exit 1
+        node hindsight-control-plane/bin/cli.js --help
+
   build-docs:
     runs-on: ubuntu-latest
 

diff --git a/docker/standalone/Dockerfile b/docker/standalone/Dockerfile
@@ -72,30 +72,39 @@ FROM node:20-slim AS cp-builder
 ARG INCLUDE_CP
 RUN if [ "$INCLUDE_CP" != "true" ]; then echo "Skipping CP build" && exit 0; fi
 
-WORKDIR /app
-
-# Copy built SDK
-COPY --from=sdk-builder /app/hindsight-clients/typescript /app/sdk
+# Create directory structure matching the monorepo layout
+# This is required because build:standalone script expects .next/standalone/memory-poc/hindsight-control-plane
+WORKDIR /app/memory-poc/hindsight-control-plane
 
 # Install Control Plane dependencies
 # Only copy package.json (not package-lock.json) to ensure npm installs
 # correct platform-specific native bindings for lightningcss/tailwindcss
 COPY hindsight-control-plane/package.json ./
+# Remove the file: dependency on SDK (we'll copy it directly later)
+RUN sed -i '/"@vectorize-io\/hindsight-client":/d' package.json
 RUN npm install
 
 # Copy Control Plane source (excluding node_modules via .dockerignore)
 COPY hindsight-control-plane/ ./
 # Remove package-lock.json to avoid conflicts with installed native bindings
-RUN rm -f package-lock.json
+# Also remove the file: dependency from package.json (restored by COPY above)
+RUN rm -f package-lock.json && sed -i '/"@vectorize-io\/hindsight-client":/d' package.json
 
-# Link SDK (temporary for build)
-RUN cd /app/sdk && npm link && cd /app && npm link @vectorize-io/hindsight-client
+# Copy built SDK directly into node_modules (more reliable than npm link in Docker)
+COPY --from=sdk-builder /app/hindsight-clients/typescript ./node_modules/@vectorize-io/hindsight-client
 
-# Build Control Plane
-RUN npm run build
+# Build Control Plane - run next build first, then custom standalone copy
+# (The build:standalone script expects a specific path structure that differs in Docker)
+RUN npm exec -- next build
 
-# Create public directory if it doesn't exist
-RUN mkdir -p public
+# Create standalone directory structure manually
+# Next.js standalone output structure varies, so we find server.js and work from there
+RUN mkdir -p standalone/.next && \
+    STANDALONE_ROOT=$(dirname $(find .next/standalone -name "server.js" | head -1)) && \
+    cp -r "$STANDALONE_ROOT"/* standalone/ && \
+    cp -r .next/static standalone/.next/static && \
+    mkdir -p standalone/public && \
+    cp -r public/* standalone/public/ 2>/dev/null || true
 
 # =============================================================================
 # Stage: Final Image - API Only
@@ -172,9 +181,9 @@ COPY --from=sdk-builder /app/hindsight-clients/typescript /app/sdk
 
 # Copy Control Plane standalone build
 WORKDIR /app/control-plane
-COPY --from=cp-builder /app/.next/standalone ./
-COPY --from=cp-builder /app/.next/static ./.next/static
-COPY --from=cp-builder /app/public ./public
+COPY --from=cp-builder /app/memory-poc/hindsight-control-plane/standalone ./
+COPY --from=cp-builder /app/memory-poc/hindsight-control-plane/.next/static ./.next/static
+COPY --from=cp-builder /app/memory-poc/hindsight-control-plane/public ./public
 
 WORKDIR /app
 
@@ -226,9 +235,9 @@ COPY --from=sdk-builder /app/hindsight-clients/typescript /app/sdk
 
 # Copy Control Plane standalone build
 WORKDIR /app/control-plane
-COPY --from=cp-builder /app/.next/standalone ./
-COPY --from=cp-builder /app/.next/static ./.next/static
-COPY --from=cp-builder /app/public ./public
+COPY --from=cp-builder /app/memory-poc/hindsight-control-plane/standalone ./
+COPY --from=cp-builder /app/memory-poc/hindsight-control-plane/.next/static ./.next/static
+COPY --from=cp-builder /app/memory-poc/hindsight-control-plane/public ./public
 
 WORKDIR /app
 

diff --git a/hindsight-api/hindsight_api/config.py b/hindsight-api/hindsight_api/config.py
@@ -31,6 +31,7 @@
 ENV_MCP_ENABLED = "HINDSIGHT_API_MCP_ENABLED"
 ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
 ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
+ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
 
 # Default values
 DEFAULT_DATABASE_URL = "pg0"
@@ -50,6 +51,26 @@
 DEFAULT_GRAPH_RETRIEVER = "bfs"  # Options: "bfs", "mpfp"
 DEFAULT_MCP_LOCAL_BANK_ID = "mcp"
 
+# Default MCP tool descriptions (can be customized via env vars)
+DEFAULT_MCP_RETAIN_DESCRIPTION = """Store important information to long-term memory.
+
+Use this tool PROACTIVELY whenever the user shares:
+- Personal facts, preferences, or interests
+- Important events or milestones
+- User history, experiences, or background
+- Decisions, opinions, or stated preferences
+- Goals, plans, or future intentions
+- Relationships or people mentioned
+- Work context, projects, or responsibilities"""
+
+DEFAULT_MCP_RECALL_DESCRIPTION = """Search memories to provide personalized, context-aware responses.
+
+Use this tool PROACTIVELY to:
+- Check user's preferences before making suggestions
+- Recall user's history to provide continuity
+- Remember user's goals and context
+- Personalize responses based on past interactions"""
+
 # Required embedding dimension for database schema
 EMBEDDING_DIMENSION = 384
 
@@ -142,7 +163,9 @@ def get_python_log_level(self) -> int:
     def configure_logging(self) -> None:
         """Configure Python logging based on the log level."""
         logging.basicConfig(
-            level=self.get_python_log_level(), format="%(asctime)s - %(levelname)s - %(name)s - %(message)s"
+            level=self.get_python_log_level(),
+            format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+            force=True,  # Override any existing configuration
         )
 
     def log_config(self) -> None:

diff --git a/hindsight-api/hindsight_api/engine/llm_wrapper.py b/hindsight-api/hindsight_api/engine/llm_wrapper.py
@@ -172,7 +172,7 @@ async def call(
 
             # Check if model supports reasoning parameter (o1, o3, gpt-5 families)
             model_lower = self.model.lower()
-            is_reasoning_model = any(x in model_lower for x in ["gpt-5", "o1", "o3"])
+            is_reasoning_model = any(x in model_lower for x in ["gpt-5", "o1", "o3", "deepseek"])
 
             # For GPT-4 and GPT-4.1 models, cap max_completion_tokens to 32000
             # For GPT-4o models, cap to 16384
@@ -194,7 +194,7 @@ async def call(
                 call_params["temperature"] = temperature
 
             # Set reasoning_effort for reasoning models (OpenAI gpt-5, o1, o3)
-            if is_reasoning_model and self.provider == "openai":
+            if is_reasoning_model:
                 call_params["reasoning_effort"] = self.reasoning_effort
 
             # Provider-specific parameters
@@ -203,7 +203,6 @@ async def call(
                 extra_body = {"service_tier": "auto"}
                 # Only add reasoning parameters for reasoning models
                 if is_reasoning_model:
-                    extra_body["reasoning_effort"] = self.reasoning_effort
                     extra_body["include_reasoning"] = False
                 call_params["extra_body"] = extra_body
 

diff --git a/hindsight-api/hindsight_api/engine/retain/orchestrator.py b/hindsight-api/hindsight_api/engine/retain/orchestrator.py
@@ -107,6 +107,10 @@ async def retain_batch(
     )
 
     if not extracted_facts:
+        total_time = time.time() - start_time
+        logger.info(
+            f"RETAIN_BATCH COMPLETE: 0 facts extracted from {len(contents)} contents in {total_time:.3f}s (nothing to store)"
+        )
         return [[] for _ in contents]
 
     # Apply fact_type_override if provided

diff --git a/hindsight-api/hindsight_api/main.py b/hindsight-api/hindsight_api/main.py
@@ -127,8 +127,10 @@ def main():
             port=args.port,
             log_level=args.log_level,
             mcp_enabled=config.mcp_enabled,
+            graph_retriever=config.graph_retriever,
         )
     config.configure_logging()
+    config.log_config()
 
     # Register cleanup handlers
     atexit.register(_cleanup)

diff --git a/hindsight-api/hindsight_api/mcp_local.py b/hindsight-api/hindsight_api/mcp_local.py
@@ -28,22 +28,35 @@
     HINDSIGHT_API_LLM_PROVIDER: Optional. LLM provider (default: "openai").
     HINDSIGHT_API_LLM_MODEL: Optional. LLM model (default: "gpt-4o-mini").
     HINDSIGHT_API_MCP_LOCAL_BANK_ID: Optional. Memory bank ID (default: "mcp").
-    HINDSIGHT_API_LOG_LEVEL: Optional. Log level (default: "info").
+    HINDSIGHT_API_LOG_LEVEL: Optional. Log level (default: "warning").
+    HINDSIGHT_API_MCP_INSTRUCTIONS: Optional. Additional instructions appended to both retain and recall tools.
+
+Example custom instructions (these are ADDED to the default behavior):
+    To also store assistant actions:
+        HINDSIGHT_API_MCP_INSTRUCTIONS="Also store every action you take, including tool calls, code written, and decisions made."
+
+    To also store conversation summaries:
+        HINDSIGHT_API_MCP_INSTRUCTIONS="Also store summaries of important conversations and their outcomes."
 """
 
 import logging
 import os
 import sys
 
 from mcp.server.fastmcp import FastMCP
+from mcp.types import Icon
 
 from hindsight_api.config import (
     DEFAULT_MCP_LOCAL_BANK_ID,
+    DEFAULT_MCP_RECALL_DESCRIPTION,
+    DEFAULT_MCP_RETAIN_DESCRIPTION,
+    ENV_MCP_INSTRUCTIONS,
     ENV_MCP_LOCAL_BANK_ID,
 )
 
-# Configure logging - default to info
-_log_level_str = os.environ.get("HINDSIGHT_API_LOG_LEVEL", "info").lower()
+# Configure logging - default to warning to avoid polluting stderr during MCP init
+# MCP clients interpret stderr output as errors, so we suppress INFO logs by default
+_log_level_str = os.environ.get("HINDSIGHT_API_LOG_LEVEL", "warning").lower()
 _log_level_map = {
     "critical": logging.CRITICAL,
     "error": logging.ERROR,
@@ -79,22 +92,21 @@ def create_local_mcp_server(bank_id: str, memory=None) -> FastMCP:
     if memory is None:
         memory = MemoryEngine(db_url="pg0://hindsight-mcp")
 
+    # Get custom instructions from environment variable (appended to both tools)
+    extra_instructions = os.environ.get(ENV_MCP_INSTRUCTIONS, "")
+
+    retain_description = DEFAULT_MCP_RETAIN_DESCRIPTION
+    recall_description = DEFAULT_MCP_RECALL_DESCRIPTION
+
+    if extra_instructions:
+        retain_description = f"{DEFAULT_MCP_RETAIN_DESCRIPTION}\n\nAdditional instructions: {extra_instructions}"
+        recall_description = f"{DEFAULT_MCP_RECALL_DESCRIPTION}\n\nAdditional instructions: {extra_instructions}"
+
     mcp = FastMCP("hindsight")
 
-    @mcp.tool()
+    @mcp.tool(description=retain_description)
     async def retain(content: str, context: str = "general") -> dict:
         """
-        Store important information to long-term memory.
-
-        Use this tool PROACTIVELY whenever the user shares:
-        - Personal facts, preferences, or interests
-        - Important events or milestones
-        - User history, experiences, or background
-        - Decisions, opinions, or stated preferences
-        - Goals, plans, or future intentions
-        - Relationships or people mentioned
-        - Work context, projects, or responsibilities
-
         Args:
             content: The fact/memory to store (be specific and include relevant details)
             context: Category for the memory (e.g., 'preferences', 'work', 'hobbies', 'family'). Default: 'general'
@@ -111,17 +123,9 @@ async def _retain():
         asyncio.create_task(_retain())
         return {"status": "accepted", "message": "Memory storage initiated"}
 
-    @mcp.tool()
+    @mcp.tool(description=recall_description)
     async def recall(query: str, max_tokens: int = 4096, budget: str = "low") -> dict:
         """
-        Search memories to provide personalized, context-aware responses.
-
-        Use this tool PROACTIVELY to:
-        - Check user's preferences before making suggestions
-        - Recall user's history to provide continuity
-        - Remember user's goals and context
-        - Personalize responses based on past interactions
-
         Args:
             query: Natural language search query (e.g., "user's food preferences", "what projects is user working on")
             max_tokens: Maximum tokens to return in results (default: 4096)
@@ -153,10 +157,9 @@ async def _initialize_and_run(bank_id: str):
     from hindsight_api import MemoryEngine
 
     # Create and initialize memory engine with pg0 embedded database
-    print("Initializing memory engine...", file=sys.stderr)
+    # Note: We avoid printing to stderr during init as MCP clients show it as "errors"
     memory = MemoryEngine(db_url="pg0://hindsight-mcp")
     await memory.initialize()
-    print("Memory engine initialized.", file=sys.stderr)
 
     # Create and run the server
     mcp = create_local_mcp_server(bank_id, memory=memory)
@@ -179,8 +182,8 @@ def main():
     # Get bank ID from environment, default to "mcp"
     bank_id = os.environ.get(ENV_MCP_LOCAL_BANK_ID, DEFAULT_MCP_LOCAL_BANK_ID)
 
-    # Print startup message to stderr (stdout is reserved for MCP protocol)
-    print(f"Hindsight MCP server starting (bank_id={bank_id})...", file=sys.stderr)
+    # Note: We don't print to stderr as MCP clients display it as "error output"
+    # Use HINDSIGHT_API_LOG_LEVEL=debug for verbose startup logging
 
     # Run the async initialization and server
     asyncio.run(_initialize_and_run(bank_id))