forked from Context-Engine-AI/Context-Engine
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathMakefile
More file actions
371 lines (288 loc) · 16.4 KB
/
Makefile
File metadata and controls
371 lines (288 loc) · 16.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
SHELL := /bin/bash
# Avoid inheriting Docker context from shells/venvs (e.g., DOCKER_HOST=unix:///Users/...)
# An empty export forces docker to use its default context/socket.
export DOCKER_HOST =
.PHONY: help up down logs ps restart rebuild index reindex watch watch-remote env hybrid bootstrap history rerank-local setup-reranker prune warm health test-e2e
.PHONY: venv venv-install dev-remote-up dev-remote-down dev-remote-logs dev-remote-restart dev-remote-bootstrap dev-remote-test dev-remote-client dev-remote-clean
.PHONY: rerank-eval rerank-eval-ablations rerank-benchmark
.PHONY: qdrant-status qdrant-list qdrant-prune qdrant-index-root
venv: ## create local virtualenv .venv
python3 -m venv .venv && . .venv/bin/activate && pip install -U pip
venv-install: ## install project dependencies into .venv
[ -d .venv ] || $(MAKE) venv
. .venv/bin/activate && pip install -r requirements.txt
# Show available targets
help: ## show targets and their descriptions
@grep -E '^[a-zA-Z0-9_-]+:.*?## ' Makefile | sed 's/:.*##/: /' | column -t
# Guard for required env vars: usage `make guard-VAR`
guard-%:
@if [ -z "${${*}}" ]; then echo "Missing env: $*"; exit 1; fi
up: ## docker compose up (build if needed)
docker compose up -d --build
down: ## docker compose down
docker compose down
logs: ## follow logs
docker compose logs -f --tail=100
ps: ## show container status
docker compose ps
restart: ## restart stack (rebuild)
docker compose down && docker compose up -d --build
rebuild: ## rebuild images without cache
docker compose build --no-cache
index: ## index code into Qdrant without dropping the collection
docker compose run --rm indexer --root /work
reindex: ## recreate collection then index from scratch (will remove existing points!)
docker compose run --rm indexer --root /work --recreate
reindex-hard: ## clear all caches (local + container) then recreate collection and index from scratch
@echo "Clearing local caches..."
@rm -f .codebase/cache.json || true
@rm -rf .codebase/symbols || true
@find dev-workspace -path "*/.codebase/cache.json" -delete 2>/dev/null || true
@find dev-workspace -path "*/.codebase/symbols" -type d -exec rm -rf {} + 2>/dev/null || true
@echo "Clearing container caches..."
@for c in indexer watcher mcp_indexer; do \
docker compose exec -T $$c sh -c 'find /work -path "*/.codebase/cache.json" -delete 2>/dev/null; find /work -path "*/.codebase/symbols" -type d -exec rm -rf {} + 2>/dev/null' 2>/dev/null || true; \
done
@echo "Reindexing..."
docker compose run --rm indexer --root /work --recreate
# Index an arbitrary local path without cloning into this repo
index-path: ## index an arbitrary repo: make index-path REPO_PATH=/abs/path [RECREATE=1] [REPO_NAME=name] [COLLECTION=name]
@if [ -z "$(REPO_PATH)" ]; then \
echo "Usage: make index-path REPO_PATH=/abs/path [RECREATE=1] [REPO_NAME=name] [COLLECTION=name]"; exit 1; \
fi
@NAME=$${REPO_NAME:-$$(basename "$(REPO_PATH)")}; \
COLL=$${COLLECTION:-$$NAME}; \
HOST_INDEX_PATH="$(REPO_PATH)" COLLECTION_NAME="$$COLL" REPO_NAME="$$NAME" \
docker compose run --rm -v "$$PWD":/app:ro --entrypoint python indexer /app/scripts/ingest_code.py --root /work $${RECREATE:+--recreate}
# Index the current working directory quickly
index-here: ## index the current directory: make index-here [RECREATE=1] [REPO_NAME=name] [COLLECTION=name]
@RP=$$(pwd); \
NAME=$${REPO_NAME:-$$(basename "$$RP")}; \
COLL=$${COLLECTION:-$$NAME}; \
HOST_INDEX_PATH="$$RP" COLLECTION_NAME="$$COLL" REPO_NAME="$$NAME" \
docker compose run --rm indexer --root /work $${RECREATE:+--recreate}
watch: ## watch mode: reindex changed files on save (Ctrl+C to stop)
docker compose run --rm --entrypoint python indexer /work/scripts/watch_index.py
watch-remote: ## remote watch mode: upload delta bundles to remote server (Ctrl+C to stop)
@echo "Starting remote watch mode..."
@if [ -z "$(REMOTE_UPLOAD_ENDPOINT)" ]; then \
echo "Error: REMOTE_UPLOAD_ENDPOINT is required"; \
echo "Usage: make watch-remote REMOTE_UPLOAD_ENDPOINT=http://your-server:8080 [REMOTE_UPLOAD_MAX_RETRIES=3] [REMOTE_UPLOAD_TIMEOUT=30]"; \
exit 1; \
fi
@echo "Remote upload endpoint: $(REMOTE_UPLOAD_ENDPOINT)"
@echo "Max retries: $${REMOTE_UPLOAD_MAX_RETRIES:-3}"
@echo "Timeout: $${REMOTE_UPLOAD_TIMEOUT:-30} seconds"
docker compose run --rm --entrypoint python \
-e REMOTE_UPLOAD_ENABLED=1 \
-e REMOTE_UPLOAD_ENDPOINT=$(REMOTE_UPLOAD_ENDPOINT) \
-e REMOTE_UPLOAD_MAX_RETRIES=$${REMOTE_UPLOAD_MAX_RETRIES:-3} \
-e REMOTE_UPLOAD_TIMEOUT=$${REMOTE_UPLOAD_TIMEOUT:-30} \
indexer /work/scripts/watch_index.py
rerank: ## multi-query re-ranker helper example
docker compose run --rm --entrypoint python indexer /work/scripts/rerank_query.py \
--query "chunk code by lines with overlap for indexing" \
--query "function to split code into overlapping line chunks" \
--language python --under /work/scripts --limit 5
warm: ## prime ANN/search caches with a few queries
docker compose run --rm --entrypoint python indexer /work/scripts/warm_start.py --ef 256 --limit 3
health: ## run health checks for collection/model settings
docker compose run --rm --entrypoint python indexer /work/scripts/health_check.py
# Check llama.cpp decoder health on localhost:8080 (200 OK expected)
decoder-health: ## ping llama.cpp server
@URL=$${LLAMACPP_HEALTH_URL:-http://localhost:8080}; \
CODE=$$(curl -s -o /dev/null -w "%{http_code}" $$URL); \
echo "llamacpp @ $$URL -> $$CODE"; \
[ "$$CODE" = "200" ] && echo "OK" || echo "WARN: non-200"
env: ## create .env from example if missing
[ -f .env ] || cp .env.example .env
hybrid: ## hybrid search: dense + lexical RRF fuse (respects --language/--under/--kind)
docker compose run --rm --entrypoint python indexer /work/scripts/hybrid_search.py \
--query "chunk code by lines" --query "overlapping line chunks" --limit 8
bootstrap: env up ## one-shot: up -> wait -> index -> warm -> health
./scripts/wait-for-qdrant.sh
$(MAKE) reindex
$(MAKE) warm || true
$(MAKE) health
history: ## ingest Git history (messages + file lists)
docker compose run --rm --entrypoint python indexer /work/scripts/ingest_history.py --max-commits 200
prune-path: ## prune a repo by path: make prune-path REPO_PATH=/abs/path
@if [ -z "$(REPO_PATH)" ]; then \
echo "Usage: make prune-path REPO_PATH=/abs/path"; exit 1; \
fi
HOST_INDEX_PATH="$(REPO_PATH)" PRUNE_ROOT=/work \
docker compose run --rm --entrypoint python indexer /work/scripts/prune.py
rerank-local: ## local cross-encoder reranker (requires RERANKER_ONNX_PATH, RERANKER_TOKENIZER_PATH)
@if [ -z "$(RERANKER_ONNX_PATH)" ] || [ -z "$(RERANKER_TOKENIZER_PATH)" ]; then \
echo "RERANKER_ONNX_PATH and RERANKER_TOKENIZER_PATH must be set in .env"; exit 1; \
fi
docker compose run --rm --entrypoint python indexer /work/scripts/rerank_local.py --query "search symbols" --topk 50 --limit 12
setup-reranker: ## download ONNX reranker + tokenizer, update .env, then smoke-test
@if [ -z "$(ONNX_URL)" ] || [ -z "$(TOKENIZER_URL)" ]; then \
echo "Provide ONNX_URL and TOKENIZER_URL, e.g."; \
echo " make setup-reranker ONNX_URL=https://.../model.onnx TOKENIZER_URL=https://.../tokenizer.json"; \
exit 1; \
fi
python3 scripts/setup_reranker.py --onnx-url "$(ONNX_URL)" --tokenizer-url "$(TOKENIZER_URL)" --dest "$(or $(DEST),models)" && \
$(MAKE) rerank-local
prune: ## remove points for missing files or mismatched file_hash
docker compose run --rm --entrypoint python indexer /work/scripts/prune.py
# Convenience: full no-cache rebuild and bring-up sequences
up-nc: ## up with full no-cache rebuild
docker compose build --no-cache && docker compose up -d
restart-nc: ## down, no-cache rebuild, up
docker compose down && docker compose build --no-cache && docker compose up -d
reset-dev: ## full dev reset: qdrant -> wait -> init payload -> reindex -> bring up services (incl. decoder)
docker compose down || true
docker compose build --no-cache indexer mcp mcp_indexer mcp_http mcp_indexer_http watcher llamacpp
docker compose up -d qdrant
./scripts/wait-for-qdrant.sh
docker compose run --rm init_payload || true
$(MAKE) tokenizer
docker compose run --rm -e INDEX_MICRO_CHUNKS -e MAX_MICRO_CHUNKS_PER_FILE -e TOKENIZER_PATH -e TOKENIZER_URL indexer --root /work --recreate
$(MAKE) llama-model
docker compose up -d mcp mcp_indexer watcher llamacpp
# Ensure watcher is up even if a prior step or manual bring-up omitted it
docker compose up -d watcher
docker compose ps
reset-dev-codex: ## bring up Qdrant + Streamable HTTP MCPs only (for OpenAI Codex RMCP)
docker compose down || true
docker compose build --no-cache indexer mcp_http mcp_indexer_http watcher llamacpp
docker compose up -d qdrant
./scripts/wait-for-qdrant.sh
# Seed Qdrant and create a fresh index for Codex
docker compose run --rm init_payload || true
$(MAKE) tokenizer
docker compose run --rm -e INDEX_MICRO_CHUNKS -e MAX_MICRO_CHUNKS_PER_FILE -e TOKENIZER_PATH -e TOKENIZER_URL indexer --root /work --recreate
$(MAKE) llama-model
docker compose up -d mcp_http mcp_indexer_http watcher llamacpp
docker compose ps
quantize-reranker: ## Quantize reranker ONNX to INT8 (set RERANKER_ONNX_PATH, optional OUTPUT_ONNX_PATH)
@[ -n "$(RERANKER_ONNX_PATH)" ] || { echo "Set RERANKER_ONNX_PATH to your ONNX file"; exit 1; }
python3 scripts/quantize_reranker.py
reset-dev-dual: ## bring up BOTH legacy SSE and Streamable HTTP MCPs (dual-compat mode)
docker compose down || true
docker compose build --no-cache indexer mcp mcp_indexer mcp_http mcp_indexer_http watcher llamacpp upload_service
docker compose up -d qdrant
./scripts/wait-for-qdrant.sh
$(MAKE) tokenizer
# Index first (creates collection), then init_payload (creates indexes on existing collection)
docker compose run --rm -e INDEX_MICRO_CHUNKS -e MAX_MICRO_CHUNKS_PER_FILE -e TOKENIZER_PATH -e TOKENIZER_URL indexer --root /work --recreate
docker compose run --rm init_payload || true
$(MAKE) llama-model
docker compose up -d mcp mcp_indexer mcp_http mcp_indexer_http watcher llamacpp upload_service
docker compose up -d watcher
docker compose ps
dev-core: ## core dev stack including uploader (alias for reset-dev-dual)
$(MAKE) reset-dev-dual
# --- llama.cpp tiny model provisioning ---
LLAMACPP_MODEL_URL ?= https://huggingface.co/ibm-granite/granite-4.0-micro-GGUF/resolve/main/granite-4.0-micro-Q4_K_M.gguf
LLAMACPP_MODEL_PATH ?= models/model.gguf
llama-model: ## download tiny GGUF model into ./models/model.gguf (override with LLAMACPP_MODEL_URL/LLAMACPP_MODEL_PATH)
@mkdir -p $(dir $(LLAMACPP_MODEL_PATH))
@echo "Downloading: $(LLAMACPP_MODEL_URL) -> $(LLAMACPP_MODEL_PATH)" && \
curl -L --fail --retry 3 -C - "$(LLAMACPP_MODEL_URL)" -o "$(LLAMACPP_MODEL_PATH)"
llamacpp-up: llama-model ## fetch tiny model (if missing) and start llama.cpp sidecar
docker compose up -d llamacpp && sleep 2 && curl -sI http://localhost:8080 | head -n1 || true
# Optional: build a custom image that bakes the model into the image (no host volume needed)
llamacpp-build-image: ## build custom llama.cpp image with baked model (override LLAMACPP_MODEL_URL)
docker build -f Dockerfile.llamacpp --build-arg MODEL_URL="$(LLAMACPP_MODEL_URL)" -t context-llamacpp:tiny .
# Download a tokenizer.json for micro-chunking (default: BAAI/bge-base-en-v1.5)
TOKENIZER_URL ?= https://huggingface.co/BAAI/bge-base-en-v1.5/resolve/main/tokenizer.json
TOKENIZER_PATH ?= models/tokenizer.json
tokenizer: ## download tokenizer.json to models/tokenizer.json (override with TOKENIZER_URL/TOKENIZER_PATH)
@mkdir -p $(dir $(TOKENIZER_PATH))
@echo "Downloading: $(TOKENIZER_URL) -> $(TOKENIZER_PATH)" && \
curl -L --fail --retry 3 -C - "$(TOKENIZER_URL)" -o "$(TOKENIZER_PATH)"
# --- Development Remote Upload System Targets ---
dev-remote-up: ## start dev-remote stack with upload service
@echo "Starting development remote upload system..."
@mkdir -p dev-workspace/.codebase
docker compose -f docker-compose.yml up -d --build
dev-remote-down: ## stop dev-remote stack
@echo "Stopping development remote upload system..."
docker compose -f docker-compose.yml down
dev-remote-logs: ## follow logs for dev-remote stack
docker compose -f docker-compose.yml logs -f --tail=100
dev-remote-restart: ## restart dev-remote stack (rebuild)
docker compose -f docker-compose.yml down && docker compose -f docker-compose.yml up -d --build
dev-remote-bootstrap: env dev-remote-up ## bootstrap dev-remote: up -> wait -> init -> index -> warm
@echo "Bootstrapping development remote upload system..."
./scripts/wait-for-qdrant.sh
docker compose -f docker-compose.yml run --rm init_payload || true
$(MAKE) tokenizer
docker compose -f docker-compose.yml run --rm indexer --root /work --recreate
$(MAKE) warm || true
$(MAKE) health
dev-remote-test: ## test remote upload workflow
@echo "Testing remote upload workflow..."
@echo "Upload service should be accessible at http://localhost:8004"
@echo "Health check: curl http://localhost:8004/health"
@echo "Status check: curl 'http://localhost:8004/api/v1/delta/status?workspace_path=/work/test-repo'"
@echo "Test upload: curl -X POST -F 'bundle=@test-bundle.tar.gz' -F 'workspace_path=/work/test-repo' http://localhost:8004/api/v1/delta/upload"
dev-remote-client: ## start remote upload client for testing
@echo "Starting remote upload client..."
docker compose -f docker-compose.yml --profile client up -d remote_upload_client
dev-remote-clean: ## clean up dev-remote volumes and containers
@echo "Cleaning up development remote upload system..."
docker compose -f docker-compose.yml down -v
docker volume rm context-engine_shared_workspace context-engine_shared_codebase context-engine_upload_temp context-engine_qdrant_storage_dev_remote 2>/dev/null || true
rm -rf dev-workspace
# Router helpers
Q ?= what is hybrid search?
route-plan: ## plan-only route for a query: make route-plan Q="your question"
python3 scripts/mcp_router.py --plan "$(Q)"
route-run: ## execute routed tool(s) over HTTP: make route-run Q="your question"
python3 scripts/mcp_router.py --run "$(Q)"
router-eval: ## run the mock-based router eval harness
python3 scripts/router_eval.py
# Live orchestration smoke test (no CI): bring up stack, reindex, run router
router-smoke: ## spin up compose, reindex, store a memory via router, then answer; exits nonzero on failure
set -e; \
docker compose down || true; \
docker compose up -d qdrant; \
./scripts/wait-for-qdrant.sh; \
$(MAKE) llama-model; \
docker compose up -d mcp_http mcp_indexer_http llamacpp; \
echo "Waiting for MCP HTTP health..."; \
for i in $$(seq 1 30); do \
code1=$$(curl -s -o /dev/null -w "%{http_code}" http://localhost:$${FASTMCP_HTTP_HEALTH_PORT:-18002}/readyz || true); \
code2=$$(curl -s -o /dev/null -w "%{http_code}" http://localhost:$${FASTMCP_INDEXER_HTTP_HEALTH_PORT:-18003}/readyz || true); \
if [ "$$code1" = "200" ] && [ "$$code2" = "200" ]; then echo "MCP HTTP ready"; break; fi; \
sleep 1; \
if [ $$i -eq 30 ]; then echo "MCP HTTP health timeout"; exit 1; fi; \
done; \
$(MAKE) reindex; \
echo "Storing a smoke memory via router..."; \
python3 scripts/mcp_router.py --run "remember this: router smoke memory"; \
echo "Running a router answer..."; \
python3 scripts/mcp_router.py --run "recap our architecture decisions for the indexer"; \
echo "router-smoke: PASS"
# Qdrant via MCP router convenience targets
qdrant-status:
python3 scripts/mcp_router.py --run "status"
qdrant-list:
python3 scripts/mcp_router.py --run "list collections"
qdrant-prune:
python3 scripts/mcp_router.py --run "prune"
qdrant-index-root:
python3 scripts/mcp_router.py --run "reindex repo"
# --- ctx CLI helper ---
# Usage examples (default prints ONLY the improved prompt):
# make ctx Q="how does hybrid search work?"
# make ctx Q="explain caching" ARGS="--language python --under scripts/"
# To include Supporting Context:
# make ctx Q="explain caching" ARGS="--with-context --limit 2"
ctx: ## enhance a prompt with repo context: make ctx Q="your question" [ARGS='--language python --under scripts/ --with-context']
@if [ -z "$(Q)" ]; then \
echo 'Usage: make ctx Q="your question" [ARGS="--language python --under scripts/ --with-context"]'; \
exit 1; \
fi; \
python3 scripts/ctx.py "$(Q)" $(ARGS)
# --- Reranker Evaluation ---
rerank-eval: ## run offline reranker evaluation (fixed queries, MRR/Recall/latency)
python3 scripts/rerank_eval.py --output rerank_eval_results.json
rerank-eval-ablations: ## run full ablation study (baseline, recursive, learning, onnx)
python3 scripts/rerank_eval.py --ablations --output rerank_eval_ablations.json
rerank-benchmark: ## run production benchmark on real codebase
python3 scripts/rerank_real_benchmark.py