Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ node_modules/
apps/openant-cli/bin/
libs/openant-core/parsers/go/go_parser/go_parser
# docs/
docs/
.worktrees/
182 changes: 164 additions & 18 deletions apps/openant-cli/cmd/init.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package cmd

import (
"encoding/json"
"fmt"
"io/fs"
"os"
"os/exec"
"path/filepath"
Expand All @@ -21,12 +23,16 @@ var initCmd = &cobra.Command{
For remote URLs, the repo is cloned into ~/.openant/projects/{org}/{repo}/repo/.
For local paths, the existing directory is referenced in place (no cloning).

If --language is not specified, the dominant language is auto-detected by
counting source files in the repository.

After init, all commands (parse, scan, etc.) work without path arguments.

Examples:
openant init https://github.com/grafana/grafana
openant init https://github.com/grafana/grafana -l go
openant init https://github.com/grafana/grafana -l go --commit 591ceb2eec0
openant init ./repos/grafana -l go
openant init ./repos/grafana
openant init ./repos/grafana -l go --name myorg/grafana`,
Args: cobra.ExactArgs(1),
Run: runInit,
Expand All @@ -44,15 +50,14 @@ var (
)

func init() {
initCmd.Flags().StringVarP(&initLanguage, "language", "l", "", "Language to analyze: python, javascript, go, c, ruby, php (required)")
initCmd.Flags().StringVarP(&initLanguage, "language", "l", "", "Language to analyze: python, javascript, go, c, ruby, php, zig, auto (default: auto-detect)")
initCmd.Flags().StringVar(&initCommit, "commit", "", "Specific commit SHA (default: HEAD)")
initCmd.Flags().StringVar(&initName, "name", "", "Override project name (default: derived from URL/path)")
initCmd.Flags().BoolVar(&initFull, "full", false, "Force full scan (rejects --incremental/--diff-base/--pr)")
initCmd.Flags().BoolVar(&initIncremental, "incremental", false, "Incremental against the last successful scan on this project")
initCmd.Flags().StringVar(&initDiffBase, "diff-base", "", "Incremental against this ref (e.g. origin/main, HEAD~5)")
initCmd.Flags().IntVar(&initPR, "pr", 0, "Incremental against a GitHub PR number (requires gh; mutex with --diff-base)")
initCmd.Flags().StringVar(&initDiffScope, "diff-scope", "", "Diff scope: changed_files, changed_functions, callers (default changed_functions)")
_ = initCmd.MarkFlagRequired("language")
}

func runInit(cmd *cobra.Command, args []string) {
Expand Down Expand Up @@ -118,7 +123,7 @@ func runInit(cmd *cobra.Command, args []string) {
}
}
} else {
// Local: verify it's a git repo and resolve absolute path
// Local: resolve absolute path
source = "local"

absPath, err := filepath.Abs(input)
Expand All @@ -127,29 +132,48 @@ func runInit(cmd *cobra.Command, args []string) {
os.Exit(1)
}

if _, err := os.Stat(filepath.Join(absPath, ".git")); err != nil {
output.PrintError(fmt.Sprintf("%s is not a git repository (no .git directory)", absPath))
repoPath = absPath
}

// Auto-detect language if not specified
if initLanguage == "" || initLanguage == "auto" {
fmt.Fprintf(os.Stderr, "Auto-detecting language...\n")
detected, err := detectLanguage(repoPath)
if err != nil {
output.PrintError(fmt.Sprintf("Language auto-detection failed: %s\nSpecify manually with -l/--language", err))
os.Exit(1)
}
initLanguage = detected
fmt.Fprintf(os.Stderr, "Detected language: %s\n", initLanguage)
}

repoPath = absPath
// Get commit SHA (best-effort — not all local paths are git repos)
isGit := false
if _, err := os.Stat(filepath.Join(repoPath, ".git")); err == nil {
isGit = true
}

// Get commit SHA
commitSHA := initCommit
if commitSHA == "" {
out, err := exec.Command("git", "-C", repoPath, "rev-parse", "HEAD").Output()
if err != nil {
output.PrintError(fmt.Sprintf("Failed to get HEAD commit: %s", err))
os.Exit(1)
if isGit {
if commitSHA == "" {
out, err := exec.Command("git", "-C", repoPath, "rev-parse", "HEAD").Output()
if err != nil {
output.PrintError(fmt.Sprintf("Failed to get HEAD commit: %s", err))
os.Exit(1)
}
commitSHA = strings.TrimSpace(string(out))
} else {
// Resolve short SHA to full SHA
out, err := exec.Command("git", "-C", repoPath, "rev-parse", commitSHA).Output()
if err == nil {
commitSHA = strings.TrimSpace(string(out))
}
}
commitSHA = strings.TrimSpace(string(out))
} else {
// Resolve short SHA to full SHA
out, err := exec.Command("git", "-C", repoPath, "rev-parse", commitSHA).Output()
if err == nil {
commitSHA = strings.TrimSpace(string(out))
if commitSHA != "" {
output.PrintWarning("--commit ignored: not a git repository")
}
commitSHA = "nogit"
}

// Create project
Expand Down Expand Up @@ -224,3 +248,125 @@ func runInit(cmd *cobra.Command, args []string) {
output.PrintSuccess("Set as active project")
fmt.Println()
}

// languagesConfig is the structure of config/languages.json.
type languagesConfig struct {
SkipDirs []string `json:"skip_dirs"`
Extensions map[string]string `json:"extensions"`
}

// findLanguagesConfig locates config/languages.json by walking up from the
// executable path and then the current working directory.
func findLanguagesConfig() (string, error) {
rel := filepath.Join("config", "languages.json")

// Strategy 1: walk up from the executable.
if exePath, err := os.Executable(); err == nil {
exePath, _ = filepath.EvalSymlinks(exePath)
dir := filepath.Dir(exePath)
for range 6 {
candidate := filepath.Join(dir, rel)
if info, err := os.Stat(candidate); err == nil && !info.IsDir() {
return candidate, nil
}
parent := filepath.Dir(dir)
if parent == dir {
break
}
dir = parent
}
}

// Strategy 2: walk up from CWD.
if cwd, err := os.Getwd(); err == nil {
dir := cwd
for range 6 {
candidate := filepath.Join(dir, rel)
if info, err := os.Stat(candidate); err == nil && !info.IsDir() {
return candidate, nil
}
parent := filepath.Dir(dir)
if parent == dir {
break
}
dir = parent
}
}

return "", fmt.Errorf("could not find config/languages.json from executable or working directory")
}

// loadLanguagesConfig loads the shared language detection config.
func loadLanguagesConfig() (*languagesConfig, error) {
path, err := findLanguagesConfig()
if err != nil {
return nil, err
}
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("failed to read %s: %w", path, err)
}
var cfg languagesConfig
if err := json.Unmarshal(data, &cfg); err != nil {
return nil, fmt.Errorf("failed to parse %s: %w", path, err)
}
return &cfg, nil
}

// detectLanguage walks a repository and returns the dominant language by file count.
// Extension mappings and skip directories are loaded from config/languages.json
// (shared with libs/openant-core/core/parser_adapter.py::detect_language()).
func detectLanguage(repoPath string) (string, error) {
cfg, err := loadLanguagesConfig()
if err != nil {
return "", fmt.Errorf("failed to load language config: %w", err)
}

skipDirs := make(map[string]bool, len(cfg.SkipDirs))
for _, d := range cfg.SkipDirs {
skipDirs[d] = true
}

counts := make(map[string]int)

err = filepath.WalkDir(repoPath, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return nil // skip inaccessible paths
}
if d.IsDir() {
if skipDirs[d.Name()] {
return filepath.SkipDir
}
return nil
}

ext := strings.ToLower(filepath.Ext(d.Name()))
if lang, ok := cfg.Extensions[ext]; ok {
counts[lang]++
}
return nil
})
if err != nil {
return "", fmt.Errorf("failed to walk repository: %w", err)
}

// Find the dominant language
bestLang := ""
bestCount := 0
for lang, count := range counts {
if count > bestCount {
bestCount = count
bestLang = lang
}
}

if bestLang == "" {
return "", fmt.Errorf(
"no supported source files found in %s. "+
"Supported languages: Python, JavaScript/TypeScript, Go, C/C++, Ruby, PHP, Zig",
repoPath,
)
}

return bestLang, nil
}
34 changes: 34 additions & 0 deletions config/languages.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"skip_dirs": [
"node_modules",
"__pycache__",
"venv",
".venv",
"dist",
"build",
".git",
"vendor"
],
"extensions": {
".py": "python",
".js": "javascript",
".ts": "javascript",
".jsx": "javascript",
".tsx": "javascript",
".mjs": "javascript",
".cjs": "javascript",
".go": "go",
".c": "c",
".h": "c",
".cpp": "c",
".hpp": "c",
".cc": "c",
".cxx": "c",
".hxx": "c",
".hh": "c",
".rb": "ruby",
".rake": "ruby",
".php": "php",
".zig": "zig"
}
}
47 changes: 23 additions & 24 deletions libs/openant-core/core/parser_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,46 +20,45 @@
# Root of openant-core (where parsers/ lives)
_CORE_ROOT = Path(__file__).parent.parent

# Shared language detection config (single source of truth: config/languages.json)
_LANGUAGES_CONFIG = Path(__file__).parent.parent.parent.parent / "config" / "languages.json"


def _load_language_config() -> dict:
"""Load language detection config from the shared config/languages.json."""
with open(_LANGUAGES_CONFIG) as f:
return json.load(f)


def detect_language(repo_path: str) -> str:
"""Auto-detect the primary language of a repository.

Counts source files by extension and returns the dominant language.
Extension mappings and skip directories are loaded from config/languages.json.

Returns:
"python", "javascript", or "go"
One of: "python", "javascript", "go", "c", "ruby", "php"
"""
config = _load_language_config()
skip_dirs = set(config["skip_dirs"])
extensions = config["extensions"]

repo = Path(repo_path)
counts = {"python": 0, "javascript": 0, "go": 0, "c": 0, "ruby": 0, "php": 0, "zig": 0}
counts: dict[str, int] = {}

for f in repo.rglob("*"):
if not f.is_file():
continue
# Skip common non-source dirs
parts = f.parts
if any(p in parts for p in (
"node_modules", "__pycache__", "venv", ".venv",
"dist", "build", ".git", "vendor",
)):
# Skip configured non-source dirs
if any(p in skip_dirs for p in f.parts):
continue

suffix = f.suffix.lower()
if suffix == ".py":
counts["python"] += 1
elif suffix in (".js", ".ts", ".jsx", ".tsx", ".mjs", ".cjs"):
counts["javascript"] += 1
elif suffix == ".go":
counts["go"] += 1
elif suffix in (".c", ".h", ".cpp", ".hpp", ".cc", ".cxx", ".hxx", ".hh"):
counts["c"] += 1
elif suffix in (".rb", ".rake"):
counts["ruby"] += 1
elif suffix == ".php":
counts["php"] += 1
elif suffix == ".zig":
counts["zig"] += 1

if not any(counts.values()):
if suffix in extensions:
lang = extensions[suffix]
counts[lang] = counts.get(lang, 0) + 1

if not counts:
raise ValueError(
f"No supported source files found in {repo_path}. "
"Supported languages: Python, JavaScript/TypeScript, Go, C/C++, Ruby, PHP, Zig."
Expand Down
Loading
Loading