- This will delete all your account data, including conversations, agents, and any assets you{"'"}ve generated. Be sure to export before you do this if you want to keep your information.
+ This will delete all your account data,
+ including conversations, agents, and any
+ assets you{"'"}ve generated. Be sure to
+ export before you do this if you want to
+ keep your information.
@@ -1261,36 +1318,56 @@ export default function SettingsView() {
- Are you absolutely sure?
+
+ Are you absolutely sure?
+
- This action is irreversible. This will permanently delete your account
- and remove all your data from our servers.
+ This action is irreversible.
+ This will permanently delete
+ your account and remove all your
+ data from our servers.
- Cancel
+
+ Cancel
+ {
try {
- const response = await fetch('/api/self', {
- method: 'DELETE'
- });
- if (!response.ok) throw new Error('Failed to delete account');
+ const response =
+ await fetch(
+ "/api/self",
+ {
+ method: "DELETE",
+ },
+ );
+ if (!response.ok)
+ throw new Error(
+ "Failed to delete account",
+ );
toast({
title: "Account Deleted",
- description: "Your account has been successfully deleted.",
+ description:
+ "Your account has been successfully deleted.",
});
// Redirect to home page after successful deletion
- window.location.href = "/";
+ window.location.href =
+ "/";
} catch (error) {
- console.error('Error deleting account:', error);
+ console.error(
+ "Error deleting account:",
+ error,
+ );
toast({
title: "Error",
- description: "Failed to delete account. Please try again or contact support.",
- variant: "destructive"
+ description:
+ "Failed to delete account. Please try again or contact support.",
+ variant:
+ "destructive",
});
}
}}
diff --git a/src/khoj/configure.py b/src/khoj/configure.py
index 40d61a888..647a9fd25 100644
--- a/src/khoj/configure.py
+++ b/src/khoj/configure.py
@@ -314,6 +314,7 @@ def configure_routes(app):
from khoj.routers.api_agents import api_agents
from khoj.routers.api_chat import api_chat
from khoj.routers.api_content import api_content
+ from khoj.routers.api_github import github_router
from khoj.routers.api_model import api_model
from khoj.routers.notion import notion_router
from khoj.routers.web_client import web_client
@@ -323,6 +324,7 @@ def configure_routes(app):
app.include_router(api_agents, prefix="/api/agents")
app.include_router(api_model, prefix="/api/model")
app.include_router(api_content, prefix="/api/content")
+ app.include_router(github_router, prefix="/api/github")
app.include_router(notion_router, prefix="/api/notion")
app.include_router(web_client)
diff --git a/src/khoj/processor/content/github/github_to_entries.py b/src/khoj/processor/content/github/github_to_entries.py
index 31f99f844..dadab0abe 100644
--- a/src/khoj/processor/content/github/github_to_entries.py
+++ b/src/khoj/processor/content/github/github_to_entries.py
@@ -1,10 +1,12 @@
import logging
+import re
import time
from typing import Dict, List, Tuple
import requests
from magika import Magika
+from khoj.database.adapters import EntryAdapters
from khoj.database.models import Entry as DbEntry
from khoj.database.models import GithubConfig, KhojUser
from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
@@ -54,11 +56,15 @@ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = Fals
logger.warning(
f"Github PAT token is not set. Private repositories cannot be indexed and lower rate limits apply."
)
+
+ if user:
+ self.resync_github_entries(user)
+
current_entries = []
for repo in self.config.repos:
current_entries += self.process_repo(repo)
- return self.update_entries_with_ids(current_entries, user=user)
+ return self.update_entries_with_ids(current_entries, user=user, regenerate=regenerate)
def process_repo(self, repo: GithubRepoConfig):
repo_url = f"https://api.github.com/repos/{repo.owner}/{repo.name}"
@@ -99,7 +105,7 @@ def process_repo(self, repo: GithubRepoConfig):
return current_entries
- def update_entries_with_ids(self, current_entries, user: KhojUser = None):
+ def update_entries_with_ids(self, current_entries, user: KhojUser = None, regenerate: bool = False):
# Identify, mark and merge any new entries with previous entries
with timer("Identify new or updated entries", logger):
num_new_embeddings, num_deleted_embeddings = self.update_embeddings(
@@ -109,10 +115,48 @@ def update_entries_with_ids(self, current_entries, user: KhojUser = None):
DbEntry.EntrySource.GITHUB,
key="compiled",
logger=logger,
+ regenerate=regenerate,
)
return num_new_embeddings, num_deleted_embeddings
+ def resync_github_entries(self, user: KhojUser = None) -> None:
+ """
+ Resync GitHub entries for the user.
+
+ This ensures that if a user deselects a repo, its files are no longer indexed.
+ Does not add or update entries — call `process()` separately for full re-index.
+ """
+
+ config = GithubConfig.objects.filter(user=user).prefetch_related("githubrepoconfig").first()
+ if config:
+ # Fetch all GitHub Entries for the user
+ files = EntryAdapters.get_all_filenames_by_source(user, "github")
+ raw_repos = config.githubrepoconfig.all()
+ repos = []
+ for repo in raw_repos:
+ repos.append(repo.owner + "/" + repo.name)
+
+ if files:
+ # Check if the entries' repository is still selected in the config
+ for file in files:
+ # We need to extract the repo name and owner from the entry's file path
+ # https://{url}/{owner}/{name}}/blob/...
+ match = re.search(r"github\.com/([^/]+)/([^/]+)", file)
+ if not match:
+ logger.warning(f"Unable to parse repo from file path: {file}")
+ continue
+
+ owner = match.group(1)
+ name = match.group(2)
+ # Construct the repo name
+ repo_name = f"{owner}/{name}"
+
+ if repo_name and repo_name not in repos:
+ # If not, delete the entry
+ logger.debug(f"Deleting entry {file} as the repo {repo_name} is not selected anymore")
+ EntryAdapters.delete_entry_by_file(user, file)
+
def get_files(self, repo_url: str, repo: GithubRepoConfig):
# Get the contents of the repository
repo_content_url = f"{repo_url}/git/trees/{repo.branch}"
@@ -176,22 +220,37 @@ def get_files(self, repo_url: str, repo: GithubRepoConfig):
def get_file_contents(self, file_url, decode=True):
# Get text from each markdown file
headers = {"Accept": "application/vnd.github.v3.raw"}
- response = self.session.get(file_url, headers=headers, stream=True)
- # Stop indexing on hitting rate limit
- if response.status_code != 200 and response.headers.get("X-RateLimit-Remaining") == "0":
- raise ConnectionAbortedError("Github rate limit reached")
-
- content = "" if decode else b""
- for chunk in response.iter_content(chunk_size=2048):
- if chunk:
- try:
- content += chunk.decode("utf-8") if decode else chunk
- except Exception as e:
- logger.error(f"Unable to decode chunk from {file_url}")
- logger.error(e)
-
- return content
+ for attempt in range(3):
+ try:
+ # Retry on rate limit
+ if attempt > 2:
+ logger.error(f"Unable to download file {file_url} after 3 attempts")
+ break
+
+ response = self.session.get(file_url, headers=headers, stream=True)
+
+ # Stop indexing on hitting rate limit
+ if response.status_code != 200 and response.headers.get("X-RateLimit-Remaining") == "0":
+ raise ConnectionAbortedError("Github rate limit reached")
+
+ content = "" if decode else b""
+ for chunk in response.iter_content(chunk_size=2048):
+ if chunk:
+ try:
+ content += chunk.decode("utf-8") if decode else chunk
+ except Exception as e:
+ logger.error(f"Unable to decode chunk from {file_url}")
+ logger.error(e)
+
+ return content
+ except requests.exceptions.ChunkedEncodingError as e:
+ logger.error(f"Chunked encoding error while downloading {file_url}. Retrying...")
+ # Retry on chunked encoding error with exponential backoff approach
+ time.sleep(2**attempt)
+
+ logger.error(f"Failed to download file {file_url} after 3 attempts")
+ return "" if decode else b""
@staticmethod
def extract_markdown_entries(markdown_files):
diff --git a/src/khoj/routers/api_github.py b/src/khoj/routers/api_github.py
new file mode 100644
index 000000000..e9c3ce5ac
--- /dev/null
+++ b/src/khoj/routers/api_github.py
@@ -0,0 +1,222 @@
+import logging
+import os
+import secrets
+from typing import Optional
+
+import httpx
+from fastapi import APIRouter, BackgroundTasks, Request
+from fastapi.responses import JSONResponse, RedirectResponse
+from starlette.authentication import requires
+
+from khoj.database import adapters
+from khoj.database.models import GithubConfig, GithubRepoConfig, KhojUser
+from khoj.processor.content.github.github_to_entries import GithubToEntries
+
+github_router = APIRouter()
+logger = logging.getLogger(__name__)
+
+# Replace these with your GitHub OAuth app credentials
+GITHUB_CLIENT_ID = os.getenv("GITHUB_CLIENT_ID")
+GITHUB_CLIENT_SECRET = os.getenv("GITHUB_CLIENT_SECRET")
+GITHUB_REDIRECT_URI = os.getenv("GITHUB_REDIRECT_URI")
+
+# In-memory store for testing (use a database in production)
+oauth_state_store = {}
+
+
+def save_oauth_state(state: str, user: KhojUser) -> None:
+ oauth_state_store[state] = user # Store the state and user mapping
+
+
+def get_user_id_by_oauth_state(state: str) -> Optional[KhojUser]:
+ return oauth_state_store.pop(state, None) # Remove the state after use
+
+
+def index_github(user: KhojUser):
+ config = GithubConfig.objects.filter(user=user).first()
+ if config:
+ GithubToEntries(config).process(files={}, user=user, regenerate=False)
+ logger.info(f"Github entries indexed for user {user.id}")
+
+
+@github_router.get("/connect")
+@requires(["authenticated"])
+async def connect_github(request: Request):
+ """
+ Redirect the user to GitHub's OAuth authorization page.
+ """
+ user = request.user
+ if not user.is_authenticated:
+ return JSONResponse(content={"error": "User not authenticated"}, status_code=401)
+
+ # Generate a unique state value
+ state = secrets.token_urlsafe(16)
+
+ # Save the state and user ID mapping (e.g., in a database or in-memory store)
+ save_oauth_state(state, user) # Implement this function to store the mapping
+
+ github_oauth_url = (
+ f"https://github.com/login/oauth/authorize"
+ f"?client_id={GITHUB_CLIENT_ID}&redirect_uri={GITHUB_REDIRECT_URI}&scope=repo,user"
+ f"&state={state}"
+ )
+ return RedirectResponse(url=github_oauth_url)
+
+
+@github_router.get("/callback")
+async def github_callback(request: Request):
+ code = request.query_params.get("code")
+ state = request.query_params.get("state")
+
+ if not code or not state:
+ logger.error("Missing code or state in GitHub callback")
+ return RedirectResponse(url="/settings")
+
+ user = get_user_id_by_oauth_state(state)
+ if not user:
+ logger.error("Invalid or expired OAuth state")
+ return RedirectResponse(url="/settings")
+
+ if not user or not hasattr(user, "object"):
+ logger.error("OAuth state returned invalid user")
+ return RedirectResponse(url="/settings")
+
+ try:
+ async with httpx.AsyncClient() as client:
+ response = await client.post(
+ "https://github.com/login/oauth/access_token",
+ headers={"Accept": "application/json"},
+ data={
+ "client_id": GITHUB_CLIENT_ID,
+ "client_secret": GITHUB_CLIENT_SECRET,
+ "code": code,
+ "redirect_uri": GITHUB_REDIRECT_URI,
+ "state": state,
+ },
+ )
+
+ if response.status_code != 200:
+ logger.error(f"GitHub token exchange failed: {response.text}")
+ return RedirectResponse(url="/settings")
+
+ token_data = response.json()
+ access_token = token_data.get("access_token")
+ if not access_token:
+ logger.error("No access token returned from GitHub")
+ return RedirectResponse(url="/settings")
+
+ except Exception as e:
+ logger.exception("Exception during GitHub token exchange")
+ return RedirectResponse(url="/settings")
+
+ try:
+ # Save the GitHub access token
+ config = await adapters.GithubConfig.objects.filter(user=user.object).afirst()
+ if not config:
+ config = await adapters.GithubConfig.objects.acreate(pat_token=access_token, user=user.object)
+ else:
+ config.pat_token = access_token
+ await config.asave()
+ await config.githubrepoconfig.all().adelete()
+
+ logger.info(f"GitHub integration successfully set up for user {user.object.id}")
+ settings_redirect = str(request.app.url_path_for("config_page"))
+
+ logger.info(f"Redirecting to Settings config page: {settings_redirect}")
+
+ return RedirectResponse(settings_redirect + "?github_connected=true")
+
+ except Exception as e:
+ logger.exception("Failed to save GitHub configuration")
+ return RedirectResponse(url="/settings")
+
+
+@github_router.get("/repos")
+@requires(["authenticated"])
+async def list_user_repos(request: Request):
+ user = request.user
+ if not user.is_authenticated:
+ return JSONResponse({"error": "Not authenticated"}, status_code=401)
+
+ config = await GithubConfig.objects.filter(user=user.object).prefetch_related("githubrepoconfig").afirst()
+ if not config:
+ return JSONResponse({"error": "GitHub not connected"}, status_code=400)
+
+ logger.debug(f"GitHub config for user {user.object.id}: config: {config.id}")
+
+ raw_repos = config.githubrepoconfig.all()
+ selected_repos = []
+ for repo in raw_repos:
+ selected_repos.append(repo.owner + "/" + repo.name)
+ logger.debug(f"Repos from DB: {selected_repos}")
+
+ headers = {"Authorization": f"token {config.pat_token}"}
+ async with httpx.AsyncClient() as client:
+ response = await client.get("https://api.github.com/user/repos", headers=headers)
+
+ if response.status_code != 200:
+ return JSONResponse(
+ {"error": "Failed to fetch repos", "detail": response.text}, status_code=response.status_code
+ )
+
+ repos = response.json()
+ return [
+ {
+ "name": r["name"],
+ "owner": r["owner"]["login"],
+ "branch": r["default_branch"],
+ "full_name": r["full_name"],
+ "description": r.get("description"),
+ "private": r.get("private", False),
+ "selected": r["full_name"] in selected_repos, # ✅ new flag
+ }
+ for r in repos
+ ]
+
+
+@github_router.post("/repos/select")
+@requires(["authenticated"])
+async def select_user_repos(request: Request, background_tasks: BackgroundTasks):
+ user = request.user
+ if not user.is_authenticated:
+ return JSONResponse({"error": "Not authenticated"}, status_code=401)
+
+ body = await request.json()
+ repos = body.get("repos", [])
+ if not repos:
+ return JSONResponse({"error": "No repositories provided"}, status_code=400)
+
+ config = await GithubConfig.objects.filter(user=user.object).afirst()
+ if not config:
+ return JSONResponse({"error": "GitHub not connected"}, status_code=400)
+
+ await config.githubrepoconfig.all().adelete() # clear old selections
+
+ for repo in repos:
+ await GithubRepoConfig.objects.acreate(
+ name=repo["name"], owner=repo["owner"], branch=repo["branch"], github_config=config
+ )
+
+ # Trigger an async job to index_github. Let it run without blocking the response.
+ background_tasks.add_task(index_github, user.object)
+
+ return {"status": "success", "count": len(repos)}
+
+
+@github_router.delete("/disconnect")
+@requires(["authenticated"])
+async def disconnect_github(request: Request):
+ """
+ Disconnect the GitHub integration for the authenticated user.
+ """
+ user = request.user
+ if not user.is_authenticated:
+ return JSONResponse(content={"error": "User not authenticated"}, status_code=401)
+
+ # Delete the GitHub configuration for the user
+ await GithubConfig.objects.filter(user=user.object).adelete()
+
+ logger.info(f"GitHub integration successfully set up for user {user.object.id}")
+ settings_redirect = str(request.app.url_path_for("config_page"))
+
+ return RedirectResponse(settings_redirect + "?github_connected=false")