aietal · IvyX79 · May 16, 2026
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -41,6 +41,21 @@ services:
     networks:
       - net
 
+  rag-engine:
+    build:
+      context: ./rag-engine
+      dockerfile: Dockerfile
+    ports:
+      - 8001:8000
+    restart: on-failure
+    environment:
+      - 'LLM_HOST=http://aimengpt-api:8000'
+      - 'CHROMA_HOST=http://chroma-server:8000'
+    volumes:
+      - rag_data:/app/data
+    networks:
+      - net
+
   aimengpt-ui:
     build:
       context: ./ui 
@@ -49,8 +64,9 @@ services:
       - 3000:3000
     restart: on-failure
     environment:
-      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
+      - 'OPENAI_API_KEY=sk-XXX...XXXX'
       - 'OPENAI_API_HOST=http://aimengpt-api:8000'
+      - 'RAG_ENGINE_HOST=http://rag-engine:8000'
       - 'DEFAULT_MODEL=/models/${MODEL_NAME:-llama-2-7b-chat.bin}'
       - 'WAIT_HOSTS=aimengpt-api:8000'
       - 'WAIT_TIMEOUT=${WAIT_TIMEOUT:-3600}'
@@ -62,3 +78,5 @@ volumes:
     driver: local
   backups:
     driver: local
+  rag_data:
+    driver: local
diff --git a/rag-engine/Dockerfile b/rag-engine/Dockerfile
@@ -0,0 +1,37 @@
+# Sci-RAG Engine — Backend Service for AimenGPT
+#
+# Provides Llama Index-powered RAG with Semantic Scholar integration,
+# citation tracking, and AI document access.
+#
+# Build:  docker build -t rag-engine -f Dockerfile .
+# Run:    docker run -p 8000:8000 rag-engine
+
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# Install system dependencies for document processing
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy requirements and install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY src/ ./src/
+COPY config/ ./config/
+
+# Create data directories
+RUN mkdir -p data/documents data/uploads data/chroma_db
+
+# Expose the API port
+EXPOSE 8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+    CMD python3 -c "import requests; requests.get('http://localhost:8000/health')" || exit 1
+
+# Run the server
+CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]
diff --git a/rag-engine/README.md b/rag-engine/README.md
@@ -0,0 +1,28 @@
+# Sci-RAG Engine for AimenGPT
+
+A production-ready Llama Index-powered RAG backend that replaces the existing Chroma-only retrieval with a full scientific document understanding pipeline.
+
+## What it adds
+
+- **Llama Index** — Hierarchical document parsing and retrieval (as requested in the bounty)
+- **Semantic Scholar + arXiv** — Search and import external references alongside uploaded documents
+- **Smart Citations** — Every answer cites sources with confidence scores
+- **Document Unification** — Uploaded PDFs and external references in one searchable index
+- **Secure AI Access** — Token-based document access for AI agents
+
+## Endpoints
+
+| Endpoint | Description |
+|----------|-------------|
+| `POST /query` | Ask a question, get answer + citations |
+| `POST /documents/upload` | Upload a PDF, DOCX, TXT, or MD file |
+| `GET /documents` | List all indexed documents |
+| `POST /references/search` | Search Semantic Scholar + arXiv |
+| `POST /references/import` | Import a paper as a document |
+| `GET /health` | Service health |
+
+## How it replaces the existing flow
+
+**Before:** Frontend API routes → ChromaDB directly → raw chunks → LLM
+
+**After:** Frontend API routes → **rag-engine** (Llama Index + citations) → ChromaDB → LLM
diff --git a/rag-engine/config/settings.yaml b/rag-engine/config/settings.yaml
@@ -0,0 +1,47 @@
+# Sci-RAG Pipeline Configuration
+
+llm:
+  provider: openrouter
+  model: deepseek/deepseek-v4-flash
+  temperature: 0.1
+  max_tokens: 4096
+
+embeddings:
+  provider: huggingface
+  model: sentence-transformers/all-MiniLM-L6-v2
+  dimension: 384
+  batch_size: 32
+
+vector_store:
+  type: chroma
+  persist_directory: data/chroma_db
+  collection_name: scientific_docs
+
+document_manager:
+  upload_dir: data/uploads
+  allowed_extensions: [.pdf, .docx, .txt, .md, .tex]
+  chunk_size: 1024
+  chunk_overlap: 200
+  max_document_size_mb: 50
+
+semantic_scholar:
+  api_base: https://api.semanticscholar.org/v1
+  max_results: 10
+  cache_ttl_hours: 24
+
+citation:
+  min_confidence: 0.6
+  max_sources_per_claim: 5
+  include_confidence: true
+  style: inline
+
+performance:
+  cache_enabled: true
+  cache_ttl_seconds: 3600
+  async_mode: true
+  max_concurrent_requests: 10
+
+server:
+  host: 0.0.0.0
+  port: 8000
+  workers: 4
diff --git a/rag-engine/docs/architecture.md b/rag-engine/docs/architecture.md
@@ -0,0 +1,158 @@
+# Sci-RAG Pipeline — Architecture Document
+
+## System Overview
+
+The Sci-RAG Pipeline is a production-ready Retrieval-Augmented Generation system designed specifically for scientific and research workflows. It unifies uploaded documents and Semantic Scholar references into a single queryable knowledge base, with proper citation tracking and AI-native access patterns.
+
+## Core Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────────┐
+│                        FastAPI Server (src/main.py)                  │
+│  ┌─────────────┐  ┌──────────────┐  ┌──────────────┐  ┌─────────┐  │
+│  │ /query      │  │ /documents/* │  │ /references  │  │ /access │  │
+│  └──────┬──────┘  └──────┬───────┘  └──────┬───────┘  └────┬────┘  │
+└─────────┼────────────────┼─────────────────┼───────────────┼────────┘
+          │                │                 │               │
+┌─────────▼────────────────▼─────────────────▼───────────────▼────────┐
+│                      Orchestration Layer                             │
+│  Routes requests to appropriate components, handles errors          │
+└─────────────────────────────────────────────────────────────────────┘
+          │                │                 │               │
+┌─────────▼─────────┐ ┌──▼──────────────────▼──┐ ┌──────────▼────────┐
+│  DocumentManager  │ │   RAG Pipeline Core     │ │  AIAccessLayer   │
+│                   │ │   (Llama Index)         │ │                  │
+│ • File ingestion  │ │ • VectorStoreIndex      │ │ • Token auth     │
+│ • SS/arXiv search │ │ • RetrieverQueryEngine  │ │ • Permission     │
+│ • Deduplication   │ │ • Node parsing          │ │ • Rate limiting  │
+│ • Manifest store  │ │ • Similarity postproc   │ │ • Audit logging  │
+└───────────────────┘ └──────────┬──────────────┘ └──────────────────┘
+                                 │
+                    ┌────────────▼────────────┐
+                    │    CitationEngine        │
+                    │                         │
+                    │ • Source tracking       │
+                    │ • Confidence scoring    │
+                    │ • Inline/footnote fmt   │
+                    │ • Validation            │
+                    └─────────────────────────┘
+```
+
+## Data Flow
+
+### Query Flow
+```
+User/AI → POST /query {"question": "..."}
+    → RAGPipeline.query()
+        → VectorIndexRetriever (top_k docs)
+        → LLM synthesis with source context
+        → CitationEngine.record_claim()
+    → Response with answer + citations
+```
+
+### Document Ingestion Flow
+```
+Upload → POST /documents/upload
+    → File saved to data/uploads/
+    → DocumentManager.ingest_uploaded_file()
+        → Text extraction (PDF/DOCX/TXT/MD/TEX)
+        → Deduplication by content hash
+        → Manifest persistence
+    → RAGPipeline.refresh_index()
+        → Rebuild VectorStoreIndex
+```
+
+### Reference Import Flow
+```
+Search → POST /references/search?query="..."
+    → DocumentManager.search_semantic_scholar()
+    → DocumentManager.search_arxiv()
+    → Returns structured paper list
+
+Import → POST /references/import
+    → DocumentManager.import_from_semantic_scholar()
+    → Refresh index
+```
+
+## Component Details
+
+### DocumentManager
+- **Purpose**: Unified document lifecycle management
+- **Storage**: JSON manifest + file system for uploaded files
+- **Deduplication**: MD5 content hash
+- **Sources**: Uploaded files (PDF, DOCX, TXT, MD, TEX), Semantic Scholar API, arXiv API
+- **Key methods**: `add_document()`, `ingest_uploaded_file()`, `search_semantic_scholar()`, `search_arxiv()`
+
+### RAG Pipeline (Llama Index)
+- **Index**: `VectorStoreIndex` with ChromaDB persistence
+- **Embeddings**: HuggingFace `all-MiniLM-L6-v2` (384-dim)
+- **LLM**: OpenRouter (configurable model) via `OpenRouter` LLM class
+- **Retrieval**: `VectorIndexRetriever` with configurable `top_k`
+- **Post-processing**: `SimilarityPostprocessor` (0.5 cutoff)
+- **Query Engine**: `RetrieverQueryEngine` with synthesized responses
+
+### CitationEngine
+- **Tracking**: Every query records all sources used
+- **Confidence**: Aggregate confidence from max individual source relevance
+- **Formats**: Inline (text markers), footnote, session report
+- **Validation**: Cross-checks citations against document store
+- **Deduplication**: Registry of unique sources across session
+
+### AIAccessLayer
+- **Authentication**: Token-based (UUID v4)
+- **Permission Levels**: READ_ONLY, READ_QUERY, FULL_ACCESS
+- **Rate Limiting**: 30 requests per 60-second window
+- **Audit**: Full activity log with timestamps, actions, status
+- **Token Controls**: Expiration time, max query count, revocation
+
+## Configuration
+
+See `config/settings.yaml` for all configurable parameters. Key settings:
+
+| Setting | Default | Description |
+|---------|---------|-------------|
+| `llm.model` | deepseek/deepseek-v4-flash | LLM for answer synthesis |
+| `embeddings.model` | all-MiniLM-L6-v2 | Text embedding model |
+| `vector_store.type` | chroma | Vector database backend |
+| `document_manager.chunk_size` | 1024 | Document chunk size |
+| `citation.min_confidence` | 0.6 | Minimum citation confidence |
+| `server.port` | 8000 | API server port |
+
+## Deployment
+
+### Production
+```bash
+# Install dependencies
+pip install -r requirements.txt
+
+# Run with uvicorn
+uvicorn src.main:app --host 0.0.0.0 --port 8000 --workers 4
+
+# Or directly
+python src/main.py
+```
+
+### Test
+```bash
+# Run tests
+pytest tests/ -v
+```
+
+## Security Considerations
+
+1. **Access tokens** — All AI-agent interactions require tokens with explicit permission levels
+2. **Rate limiting** — Prevents abuse of the query endpoint
+3. **Audit logging** — All access is logged for review
+4. **File validation** — Only allowed extensions are processed; max file size enforced
+5. **CORS** — Configured permissive by default; restrict in production
+
+## Extensibility
+
+The pipeline is designed for component swapping:
+
+| Component | Default | Alternatives |
+|-----------|---------|--------------|
+| LLM | OpenRouter | OpenAI, Anthropic, local (Ollama) |
+| Embeddings | HuggingFace MiniLM | OpenAI Embeddings, Cohere |
+| Vector Store | ChromaDB | Pinecone, Weaviate, Qdrant, Simple |
+| Document Store | JSON manifest | SQLite, PostgreSQL, S3 |
diff --git a/rag-engine/requirements.txt b/rag-engine/requirements.txt
@@ -0,0 +1,31 @@
+# Sci-RAG Pipeline Dependencies
+
+# Core
+llama-index>=0.12.0
+llama-index-core>=0.12.0
+llama-index-llms-openrouter
+llama-index-embeddings-huggingface
+llama-index-vector-stores-chroma
+llama-index-readers-file
+
+# Document Processing
+pypdf>=4.0
+python-docx>=1.1.0
+
+# Vector Store
+chromadb>=0.5.0
+
+# Embeddings
+sentence-transformers>=2.2
+
+# External References
+arxiv>=2.0
+aiohttp>=3.9
+
+# API Server
+fastapi>=0.109
+uvicorn[standard]>=0.29
+pydantic>=2.0
+
+# Utilities
+pyyaml>=6.0
diff --git a/rag-engine/src/__init__.py b/rag-engine/src/__init__.py
@@ -0,0 +1 @@
+"""Sci-RAG Engine — Backend RAG service for AimenGPT."""
diff --git a/rag-engine/src/__pycache__/__init__.cpython-313.pyc b/rag-engine/src/__pycache__/__init__.cpython-313.pyc
diff --git a/rag-engine/src/__pycache__/ai_access_layer.cpython-313.pyc b/rag-engine/src/__pycache__/ai_access_layer.cpython-313.pyc
diff --git a/rag-engine/src/__pycache__/citation_engine.cpython-313.pyc b/rag-engine/src/__pycache__/citation_engine.cpython-313.pyc
diff --git a/rag-engine/src/__pycache__/config.cpython-313.pyc b/rag-engine/src/__pycache__/config.cpython-313.pyc
diff --git a/rag-engine/src/__pycache__/document_manager.cpython-313.pyc b/rag-engine/src/__pycache__/document_manager.cpython-313.pyc
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Sci-RAG Engine — Backend RAG service for AimenGPT."""